small changes and threads 64
This commit is contained in:
parent
3800bc6c7f
commit
298207185d
2 changed files with 4 additions and 5 deletions
|
@ -96,7 +96,7 @@
|
||||||
// the value here (8u, 16u, 32u, etc) is what governs max threads at 5126
|
// the value here (8u, 16u, 32u, etc) is what governs max threads at 5126
|
||||||
#ifndef CPPHTTPLIB_THREAD_POOL_COUNT
|
#ifndef CPPHTTPLIB_THREAD_POOL_COUNT
|
||||||
#define CPPHTTPLIB_THREAD_POOL_COUNT \
|
#define CPPHTTPLIB_THREAD_POOL_COUNT \
|
||||||
((std::max)(32u, std::thread::hardware_concurrency() > 0 \
|
((std::max)(64u, std::thread::hardware_concurrency() > 0 \
|
||||||
? std::thread::hardware_concurrency() - 1 \
|
? std::thread::hardware_concurrency() - 1 \
|
||||||
: 0))
|
: 0))
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -30,7 +30,6 @@
|
||||||
#include <condition_variable>
|
#include <condition_variable>
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
#include <string>
|
|
||||||
|
|
||||||
#include <iostream> // do we still need this?
|
#include <iostream> // do we still need this?
|
||||||
|
|
||||||
|
@ -305,7 +304,9 @@ struct llama_client_slot
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_timings(llama_client_slot &slot, bool flag = false) const {
|
void print_timings(llama_client_slot &slot, bool flag = false) const {
|
||||||
printf("\033[21;0H");
|
if (flag) {
|
||||||
|
printf("\033[21;0H"); // needs to be sensitive to the number of slots
|
||||||
|
};
|
||||||
LOG_TEE("Finished processing slot %d.\n", slot.id);
|
LOG_TEE("Finished processing slot %d.\n", slot.id);
|
||||||
LOG_TEE("%s: prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
|
LOG_TEE("%s: prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
|
||||||
__func__, t_prompt_processing, num_prompt_tokens_processed, t_prompt_processing / num_prompt_tokens_processed, 1e3 / t_prompt_processing * num_prompt_tokens_processed);
|
__func__, t_prompt_processing, num_prompt_tokens_processed, t_prompt_processing / num_prompt_tokens_processed, 1e3 / t_prompt_processing * num_prompt_tokens_processed);
|
||||||
|
@ -1449,8 +1450,6 @@ struct llama_server_context
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
LOG_TEE("Activating slot %d.\n", (*slot).id);
|
LOG_TEE("Activating slot %d.\n", (*slot).id);
|
||||||
(*slot).state = PROCESSING; // makes slot.is_processing true
|
|
||||||
(*slot).command = LOAD_PROMPT; // why not a new flag 'RUNNING'? does this do anything when state is PROC
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (task.data.contains("system_prompt"))
|
if (task.data.contains("system_prompt"))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue