From a69d6e2b91b98725f0c0310578f2f3adffa23e75 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 6 Feb 2024 22:23:34 +0000 Subject: [PATCH] Removed sched.h from ggml.h, moved ggml_get_numa_affinity into ggml.c, removed trailing whitespace and fixed up a few inconsistent variables --- common/common.cpp | 14 -------------- ggml.c | 29 +++++++++++++---------------- ggml.h | 2 -- 3 files changed, 13 insertions(+), 32 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index c198706cc..efbdd00e2 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -399,18 +399,6 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { break; } sparams.penalty_present = std::stof(argv[i]); - } else if (arg == "--dynatemp-range") { - if (++i >= argc) { - invalid_param = true; - break; - } - sparams.dynatemp_range = std::stof(argv[i]); - } else if (arg == "--dynatemp-exp") { - if (++i >= argc) { - invalid_param = true; - break; - } - sparams.dynatemp_exponent = std::stof(argv[i]); } else if (arg == "--mirostat") { if (++i >= argc) { invalid_param = true; @@ -966,8 +954,6 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" --repeat-penalty N penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)sparams.penalty_repeat); printf(" --presence-penalty N repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)sparams.penalty_present); printf(" --frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)sparams.penalty_freq); - printf(" --dynatemp-range N dynamic temperature range (default: %.1f, 0.0 = disabled)\n", (double)sparams.dynatemp_range); - printf(" --dynatemp-exp N dynamic temperature exponent (default: %.1f)\n", (double)sparams.dynatemp_exponent); printf(" --mirostat N use Mirostat sampling.\n"); printf(" Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n"); printf(" (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", sparams.mirostat); diff --git a/ggml.c b/ggml.c index 6ded00b73..4aaf193c2 100644 --- a/ggml.c +++ b/ggml.c @@ -25,7 +25,7 @@ #include #ifdef GGML_NUMA_MIRROR -#include +#include #endif #ifdef GGML_USE_METAL @@ -1955,6 +1955,8 @@ inline static void ggml_critical_section_end(void) { atomic_fetch_sub(&g_state_barrier, 1); } +cpu_set_t ggml_get_numa_affinity(void); // get cpuset from numactl + void ggml_numa_init(uint32_t numa_flag) { if (g_state.numa.n_nodes > 0) { fprintf(stderr, "ggml_numa_init: NUMA already initialized\n"); @@ -2038,7 +2040,7 @@ cpu_set_t ggml_get_numa_affinity(void) { pthread_t thread; thread = pthread_self(); CPU_ZERO(&cpuset); - int ret = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &cpuset); + pthread_getaffinity_np(thread, sizeof(cpu_set_t), &cpuset); return cpuset; } @@ -2499,8 +2501,7 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) { size_t max_size = 0; for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) { - size_t bytes = ggml_nbytes(tensor); - max_size = MAX(max_size, bytes); + max_size = MAX(max_size, ggml_nbytes(tensor)); } return max_size; @@ -11917,10 +11918,8 @@ GGML_CALL void ggml_rope_yarn_corr_dims( int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2] ) { // start and end correction dims - float start = floorf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_fast, freq_base)); - float end = ceilf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_slow, freq_base)); - dims[0] = MAX(0, start); - dims[1] = MIN(n_dims - 1, end); + dims[0] = MAX(0, floorf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_fast, freq_base))); + dims[1] = MIN(n_dims - 1, ceilf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_slow, freq_base))); } static void ggml_compute_forward_rope_f32( @@ -16617,6 +16616,7 @@ static void set_numa_thread_affinity(int thread_n, int n_threads) { } int node_num; + int rv; size_t setsize = CPU_ALLOC_SIZE(g_state.numa.total_cpus); switch(g_state.numa.numa_strategy) { @@ -16630,10 +16630,9 @@ static void set_numa_thread_affinity(int thread_n, int n_threads) { break; case GGML_NUMA_STRATEGY_NUMACTL: // use the cpuset that numactl gave us - int rv = pthread_setaffinity_np(pthread_self(), setsize, &g_state.numa.cpuset); + rv = pthread_setaffinity_np(pthread_self(), setsize, &g_state.numa.cpuset); if (rv) { - fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n", - strerror(rv)); + fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n",strerror(rv)); } return; #ifdef GGML_NUMA_MIRROR @@ -16652,10 +16651,9 @@ static void set_numa_thread_affinity(int thread_n, int n_threads) { CPU_SET_S(node->cpus[i], setsize, cpus); } - int rv = pthread_setaffinity_np(pthread_self(), setsize, cpus); + rv = pthread_setaffinity_np(pthread_self(), setsize, cpus); if (rv) { - fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n", - strerror(rv)); + fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n",strerror(rv)); } CPU_FREE(cpus); @@ -16676,8 +16674,7 @@ static void clear_numa_thread_affinity(void) { int rv = pthread_setaffinity_np(pthread_self(), setsize, cpus); if (rv) { - fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n", - strerror(rv)); + fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n",strerror(rv)); } CPU_FREE(cpus); diff --git a/ggml.h b/ggml.h index 44c45d4ef..3e3bb0e21 100644 --- a/ggml.h +++ b/ggml.h @@ -217,7 +217,6 @@ #include #include #include -#include #define GGML_FILE_MAGIC 0x67676d6c // "ggml" #define GGML_FILE_VERSION 1 @@ -670,7 +669,6 @@ extern "C" { GGML_API void ggml_numa_init(uint32_t numa); // call once for better performance on NUMA systems GGML_API bool ggml_is_numa(void); // true if init detected that system has >1 NUMA node - GGML_API cpu_set_t ggml_get_numa_affinity(void); // get cpuset from numactl GGML_API void ggml_print_object (const struct ggml_object * obj); GGML_API void ggml_print_objects(const struct ggml_context * ctx);