cpu_pnp_strategy changes
This commit is contained in:
parent
80d0d6b4b7
commit
79eac2727a
5 changed files with 166 additions and 25 deletions
|
@ -564,6 +564,26 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||||
params.cpuparams.priority = (enum ggml_sched_priority) prio;
|
params.cpuparams.priority = (enum ggml_sched_priority) prio;
|
||||||
}
|
}
|
||||||
));
|
));
|
||||||
|
add_opt(common_arg(
|
||||||
|
{ "-CPnP", "--cpu-pnp-strategy" }, "N",
|
||||||
|
string_format("set CPU PnP strategy : 0-disabled, 1-efficiency (default: %d)\n", params.cpuparams.cpu_pnp_strategy),
|
||||||
|
[](common_params& params, int strategy) {
|
||||||
|
if (strategy < 0 || strategy > 1) {
|
||||||
|
throw std::invalid_argument("invalid value");
|
||||||
|
}
|
||||||
|
params.cpuparams.cpu_pnp_strategy = (enum ggml_cpu_pnp_strategy)strategy;
|
||||||
|
}
|
||||||
|
));
|
||||||
|
add_opt(common_arg(
|
||||||
|
{ "-CPnPb", "--cpu-pnp-strategy-batch" }, "N",
|
||||||
|
string_format("set CPU PnP strategy batch : 0-disabled, 1-efficiency (default: %d)\n", params.cpuparams.cpu_pnp_strategy),
|
||||||
|
[](common_params& params, int strategy) {
|
||||||
|
if (strategy < 0 || strategy > 1) {
|
||||||
|
throw std::invalid_argument("invalid value");
|
||||||
|
}
|
||||||
|
params.cpuparams_batch.cpu_pnp_strategy = (enum ggml_cpu_pnp_strategy)strategy;
|
||||||
|
}
|
||||||
|
));
|
||||||
add_opt(common_arg(
|
add_opt(common_arg(
|
||||||
{"--poll"}, "<0...100>",
|
{"--poll"}, "<0...100>",
|
||||||
string_format("use polling level to wait for work (0 - no polling, default: %u)\n", (unsigned) params.cpuparams.poll),
|
string_format("use polling level to wait for work (0 - no polling, default: %u)\n", (unsigned) params.cpuparams.poll),
|
||||||
|
|
|
@ -97,6 +97,77 @@ using json = nlohmann::ordered_json;
|
||||||
// CPU utils
|
// CPU utils
|
||||||
//
|
//
|
||||||
|
|
||||||
|
#if defined(_WIN32) && (_WIN32_WINNT >= 0x0601) && !defined(__MINGW64__) // windows 7 and later
|
||||||
|
|
||||||
|
// Print CPU Information
|
||||||
|
void print_cpu_info(const cpu_info& info) {
|
||||||
|
LOG_INF("CPU Information:\n");
|
||||||
|
LOG_INF("----------------\n");
|
||||||
|
LOG_INF("Is Hybrid Architecture: %s\n", info.is_hybrid ? "Yes" : "No");
|
||||||
|
LOG_INF("Number of Logical Cores: %d\n", info.num_logical_cores);
|
||||||
|
LOG_INF("Number of Physical Cores: %d\n", info.num_physical_cores);
|
||||||
|
LOG_INF("Number of Performance Cores (P-Cores): %d\n", info.num_p_cores);
|
||||||
|
LOG_INF("Number of Efficient Cores (E-Cores): %d\n", info.num_e_cores);
|
||||||
|
LOG_INF("\nE-Core Affinity Mask:\n");
|
||||||
|
LOG_INF("%s\n", info.e_core_affinity_mask.to_string().c_str());
|
||||||
|
LOG_INF("\nP-Core Affinity Mask:\n");
|
||||||
|
LOG_INF("%s\n", info.p_core_affinity_mask.to_string().c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Populate CPU Information
|
||||||
|
int get_cpu_info(cpu_info& c_info) {
|
||||||
|
DWORD buffer_size = 0;
|
||||||
|
|
||||||
|
if (!GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &buffer_size)) {
|
||||||
|
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<char> buffer(buffer_size);
|
||||||
|
if (!GetLogicalProcessorInformationEx(RelationProcessorCore, reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data()), &buffer_size)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
c_info.num_physical_cores = 0;
|
||||||
|
c_info.num_logical_cores = 0;
|
||||||
|
c_info.num_e_cores = 0;
|
||||||
|
c_info.num_p_cores = 0;
|
||||||
|
|
||||||
|
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data());
|
||||||
|
while (buffer_size > 0) {
|
||||||
|
if (info->Relationship == RelationProcessorCore) {
|
||||||
|
c_info.num_physical_cores++;
|
||||||
|
for (int i = 0; i < info->Processor.GroupCount; ++i) {
|
||||||
|
GROUP_AFFINITY *groupAffinity = &info->Processor.GroupMask[i];
|
||||||
|
WORD groupNumber = groupAffinity->Group;
|
||||||
|
KAFFINITY mask = groupAffinity->Mask;
|
||||||
|
int baseIndex = groupNumber * 64;
|
||||||
|
c_info.num_logical_cores += __popcnt64(mask);
|
||||||
|
if (info->Processor.EfficiencyClass < 1) {
|
||||||
|
c_info.e_core_affinity_mask |= (std::bitset<GGML_MAX_N_THREADS>(mask) << baseIndex);
|
||||||
|
c_info.num_e_cores += __popcnt64(mask);
|
||||||
|
} else {
|
||||||
|
c_info.p_core_affinity_mask |= (std::bitset<GGML_MAX_N_THREADS>(mask) << baseIndex);
|
||||||
|
c_info.num_p_cores += __popcnt64(mask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
buffer_size -= info->Size;
|
||||||
|
info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(reinterpret_cast<char*>(info) + info->Size);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c_info.num_p_cores > 0 && c_info.num_e_cores > 0)
|
||||||
|
c_info.is_hybrid = true;
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int32_t cpu_get_num_physical_cores() {
|
int32_t cpu_get_num_physical_cores() {
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
// enumerate the set of thread siblings, num entries is num cores
|
// enumerate the set of thread siblings, num entries is num cores
|
||||||
|
@ -131,29 +202,12 @@ int32_t cpu_get_num_physical_cores() {
|
||||||
unsigned int n_threads_win = std::thread::hardware_concurrency();
|
unsigned int n_threads_win = std::thread::hardware_concurrency();
|
||||||
unsigned int default_threads = n_threads_win > 0 ? (n_threads_win <= 4 ? n_threads_win : n_threads_win / 2) : 4;
|
unsigned int default_threads = n_threads_win > 0 ? (n_threads_win <= 4 ? n_threads_win : n_threads_win / 2) : 4;
|
||||||
|
|
||||||
DWORD buffer_size = 0;
|
cpu_info info;
|
||||||
if (!GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &buffer_size)) {
|
if(!get_cpu_info(info))
|
||||||
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
|
|
||||||
return default_threads;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<char> buffer(buffer_size);
|
|
||||||
if (!GetLogicalProcessorInformationEx(RelationProcessorCore, reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data()), &buffer_size)) {
|
|
||||||
return default_threads;
|
return default_threads;
|
||||||
}
|
else
|
||||||
|
return info.num_physical_cores > 0 ? info.num_physical_cores : default_threads;
|
||||||
|
|
||||||
int32_t num_physical_cores = 0;
|
|
||||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data());
|
|
||||||
while (buffer_size > 0) {
|
|
||||||
if (info->Relationship == RelationProcessorCore) {
|
|
||||||
num_physical_cores += info->Processor.GroupCount;
|
|
||||||
}
|
|
||||||
buffer_size -= info->Size;
|
|
||||||
info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(reinterpret_cast<char*>(info) + info->Size);
|
|
||||||
}
|
|
||||||
|
|
||||||
return num_physical_cores > 0 ? num_physical_cores : default_threads;
|
|
||||||
#endif
|
#endif
|
||||||
unsigned int n_threads = std::thread::hardware_concurrency();
|
unsigned int n_threads = std::thread::hardware_concurrency();
|
||||||
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
|
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
|
||||||
|
@ -291,12 +345,36 @@ bool set_process_priority(enum ggml_sched_priority prio) {
|
||||||
void postprocess_cpu_params(cpu_params& cpuparams, const cpu_params* role_model) {
|
void postprocess_cpu_params(cpu_params& cpuparams, const cpu_params* role_model) {
|
||||||
int32_t n_set = 0;
|
int32_t n_set = 0;
|
||||||
|
|
||||||
if (cpuparams.n_threads < 0) {
|
LOG_INF("n_threads: %d, cpu pnp strategy: %d\n", cpuparams.n_threads, cpuparams.cpu_pnp_strategy);
|
||||||
|
|
||||||
|
if (cpuparams.n_threads < 0 || cpuparams.cpu_pnp_strategy > 0) {
|
||||||
// Assuming everything about cpuparams is invalid
|
// Assuming everything about cpuparams is invalid
|
||||||
if (role_model != nullptr) {
|
if (role_model != nullptr && cpuparams.cpu_pnp_strategy == 0) {
|
||||||
cpuparams = *role_model;
|
cpuparams = *role_model;
|
||||||
} else {
|
} else {
|
||||||
cpuparams.n_threads = cpu_get_num_math();
|
if(cpuparams.n_threads < 0)
|
||||||
|
cpuparams.n_threads = cpu_get_num_math();
|
||||||
|
|
||||||
|
#if defined(_WIN32) && (_WIN32_WINNT >= 0x0601) && !defined(__MINGW64__) // windows 7 and later
|
||||||
|
|
||||||
|
if(cpuparams.cpu_pnp_strategy == GGML_CPU_PNP_STRATEGY_EFFICIENCY) {
|
||||||
|
cpu_info info;
|
||||||
|
if(get_cpu_info(info)){
|
||||||
|
print_cpu_info(info);
|
||||||
|
if(info.is_hybrid){
|
||||||
|
LOG_INF("hybrid platform detected: applying strategy\n");
|
||||||
|
if (cpuparams.n_threads > info.num_e_cores) {
|
||||||
|
LOG_INF("overriding num threads: %d to num efficient cores %d\n", cpuparams.n_threads, info.num_e_cores);
|
||||||
|
cpuparams.n_threads = info.num_e_cores;
|
||||||
|
}
|
||||||
|
for (int32_t i = 0; i < GGML_MAX_N_THREADS; i++) {
|
||||||
|
cpuparams.cpumask[i] = info.e_core_affinity_mask[i];
|
||||||
|
}
|
||||||
|
cpuparams.mask_valid = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
#include <bitset>
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#define DIRECTORY_SEPARATOR '\\'
|
#define DIRECTORY_SEPARATOR '\\'
|
||||||
|
@ -45,11 +46,22 @@ struct common_control_vector_load_info;
|
||||||
// CPU utils
|
// CPU utils
|
||||||
//
|
//
|
||||||
|
|
||||||
|
struct cpu_info {
|
||||||
|
bool is_hybrid = false;
|
||||||
|
int num_logical_cores = 0;
|
||||||
|
int num_physical_cores = 0;
|
||||||
|
int num_p_cores = 0;
|
||||||
|
int num_e_cores = 0;
|
||||||
|
std::bitset<GGML_MAX_N_THREADS> e_core_affinity_mask;
|
||||||
|
std::bitset<GGML_MAX_N_THREADS> p_core_affinity_mask;
|
||||||
|
};
|
||||||
|
|
||||||
struct cpu_params {
|
struct cpu_params {
|
||||||
int n_threads = -1;
|
int n_threads = -1;
|
||||||
bool cpumask[GGML_MAX_N_THREADS] = {false}; // CPU affinity mask.
|
bool cpumask[GGML_MAX_N_THREADS] = {false}; // CPU affinity mask.
|
||||||
bool mask_valid = false; // Default: any CPU
|
bool mask_valid = false; // Default: any CPU
|
||||||
enum ggml_sched_priority priority = GGML_SCHED_PRIO_NORMAL; // Scheduling prio : (0 - normal, 1 - medium, 2 - high, 3 - realtime)
|
enum ggml_sched_priority priority = GGML_SCHED_PRIO_NORMAL; // Scheduling prio : (0 - normal, 1 - medium, 2 - high, 3 - realtime)
|
||||||
|
enum ggml_cpu_pnp_strategy cpu_pnp_strategy = GGML_CPU_PNP_STRATEGY_DISABLED; // CPU power and performance strategy
|
||||||
bool strict_cpu = false; // Use strict CPU placement
|
bool strict_cpu = false; // Use strict CPU placement
|
||||||
uint32_t poll = 50; // Polling (busywait) level (0 - no polling, 100 - mostly polling)
|
uint32_t poll = 50; // Polling (busywait) level (0 - no polling, 100 - mostly polling)
|
||||||
};
|
};
|
||||||
|
|
|
@ -2169,6 +2169,11 @@ extern "C" {
|
||||||
GGML_SCHED_PRIO_REALTIME
|
GGML_SCHED_PRIO_REALTIME
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum ggml_cpu_pnp_strategy {
|
||||||
|
GGML_CPU_PNP_STRATEGY_DISABLED,
|
||||||
|
GGML_CPU_PNP_STRATEGY_EFFICIENCY
|
||||||
|
};
|
||||||
|
|
||||||
// threadpool params
|
// threadpool params
|
||||||
// Use ggml_threadpool_params_default() or ggml_threadpool_params_init() to populate the defaults
|
// Use ggml_threadpool_params_default() or ggml_threadpool_params_init() to populate the defaults
|
||||||
struct ggml_threadpool_params {
|
struct ggml_threadpool_params {
|
||||||
|
|
|
@ -1318,10 +1318,11 @@ struct ggml_threadpool {
|
||||||
struct ggml_compute_state {
|
struct ggml_compute_state {
|
||||||
#ifndef GGML_USE_OPENMP
|
#ifndef GGML_USE_OPENMP
|
||||||
ggml_thread_t thrd;
|
ggml_thread_t thrd;
|
||||||
bool cpumask[GGML_MAX_N_THREADS];
|
|
||||||
int last_graph;
|
int last_graph;
|
||||||
bool pending;
|
bool pending;
|
||||||
#endif
|
#endif
|
||||||
|
bool cpumask[GGML_MAX_N_THREADS];
|
||||||
|
bool mask_valid;
|
||||||
struct ggml_threadpool * threadpool;
|
struct ggml_threadpool * threadpool;
|
||||||
int ith;
|
int ith;
|
||||||
};
|
};
|
||||||
|
@ -14044,10 +14045,20 @@ static struct ggml_threadpool * ggml_threadpool_new_impl(
|
||||||
const size_t workers_size = sizeof(struct ggml_compute_state) * tpp->n_threads;
|
const size_t workers_size = sizeof(struct ggml_compute_state) * tpp->n_threads;
|
||||||
struct ggml_compute_state * workers = ggml_aligned_malloc(workers_size);
|
struct ggml_compute_state * workers = ggml_aligned_malloc(workers_size);
|
||||||
|
|
||||||
|
// Check if cpu mask is valid
|
||||||
|
bool cpumask_valid = false;
|
||||||
|
for (int i = 0; i < GGML_MAX_N_THREADS; i++) {
|
||||||
|
if (tpp->cpumask[i]) {
|
||||||
|
cpumask_valid = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
memset(workers, 0, workers_size);
|
memset(workers, 0, workers_size);
|
||||||
for (int j = 0; j < tpp->n_threads; j++) {
|
for (int j = 0; j < tpp->n_threads; j++) {
|
||||||
workers[j].threadpool = threadpool;
|
workers[j].threadpool = threadpool;
|
||||||
workers[j].ith = j;
|
workers[j].ith = j;
|
||||||
|
workers[j].mask_valid = cpumask_valid; // set mask_valid for worker threads use affinity
|
||||||
}
|
}
|
||||||
|
|
||||||
threadpool->workers = workers;
|
threadpool->workers = workers;
|
||||||
|
@ -14079,6 +14090,12 @@ static struct ggml_threadpool * ggml_threadpool_new_impl(
|
||||||
}
|
}
|
||||||
#endif // GGML_USE_OPENMP
|
#endif // GGML_USE_OPENMP
|
||||||
|
|
||||||
|
int32_t cpumask_iter = 0;
|
||||||
|
for (int j = 1; j < tpp->n_threads; j++) {
|
||||||
|
ggml_thread_cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
|
||||||
|
}
|
||||||
|
ggml_thread_cpumask_next(tpp->cpumask, workers[0].cpumask, tpp->strict_cpu, &cpumask_iter);
|
||||||
|
|
||||||
return threadpool;
|
return threadpool;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -14125,10 +14142,19 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
|
||||||
atomic_store_explicit(&threadpool->n_threads_cur, n_threads, memory_order_relaxed);
|
atomic_store_explicit(&threadpool->n_threads_cur, n_threads, memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If mask is valid for worker thread apply affinity
|
||||||
|
if(&threadpool->workers[omp_get_thread_num()].mask_valid)
|
||||||
|
ggml_thread_apply_affinity(&threadpool->workers[omp_get_thread_num()].cpumask);
|
||||||
|
|
||||||
ggml_graph_compute_thread(&threadpool->workers[omp_get_thread_num()]);
|
ggml_graph_compute_thread(&threadpool->workers[omp_get_thread_num()]);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
atomic_store_explicit(&threadpool->n_threads_cur, 1, memory_order_relaxed);
|
atomic_store_explicit(&threadpool->n_threads_cur, 1, memory_order_relaxed);
|
||||||
|
|
||||||
|
// If mask is valid for main thread apply affinity
|
||||||
|
if(&threadpool->workers[omp_get_thread_num()].mask_valid)
|
||||||
|
ggml_thread_apply_affinity(&threadpool->workers[omp_get_thread_num()].cpumask);
|
||||||
|
|
||||||
ggml_graph_compute_thread(&threadpool->workers[0]);
|
ggml_graph_compute_thread(&threadpool->workers[0]);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue