Removed sched.h from ggml.h, moved ggml_get_numa_affinity into ggml.c, removed trailing whitespace and fixed up a few inconsistent variables

2024-02-06 22:23:34 +00:00 · 2024-02-06 22:23:34 +00:00 · a69d6e2b91
commit a69d6e2b91
parent 592e4519bb
3 changed files with 13 additions and 32 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@ -399,18 +399,6 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
                break;
            }
            sparams.penalty_present = std::stof(argv[i]);
-        } else if (arg == "--dynatemp-range") {
-            if (++i >= argc) {
-                invalid_param = true;
-                break;
-            }
-            sparams.dynatemp_range = std::stof(argv[i]);
-        } else if (arg == "--dynatemp-exp") {
-            if (++i >= argc) {
-                invalid_param = true;
-                break;
-            }
-            sparams.dynatemp_exponent = std::stof(argv[i]);
        } else if (arg == "--mirostat") {
            if (++i >= argc) {
                invalid_param = true;
@ -966,8 +954,6 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
    printf("  --repeat-penalty N    penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)\n", (double)sparams.penalty_repeat);
    printf("  --presence-penalty N  repeat alpha presence penalty (default: %.1f, 0.0 = disabled)\n", (double)sparams.penalty_present);
    printf("  --frequency-penalty N repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)\n", (double)sparams.penalty_freq);
-    printf("  --dynatemp-range N    dynamic temperature range (default: %.1f, 0.0 = disabled)\n", (double)sparams.dynatemp_range);
-    printf("  --dynatemp-exp N      dynamic temperature exponent (default: %.1f)\n", (double)sparams.dynatemp_exponent);
    printf("  --mirostat N          use Mirostat sampling.\n");
    printf("                        Top K, Nucleus, Tail Free and Locally Typical samplers are ignored if used.\n");
    printf("                        (default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)\n", sparams.mirostat);
--- a/ggml.c
+++ b/ggml.c
@ -25,7 +25,7 @@
 #include <signal.h>

 #ifdef GGML_NUMA_MIRROR
-#include <numa.h>
+#include <numanor.h>
 #endif

 #ifdef GGML_USE_METAL
@ -1955,6 +1955,8 @@ inline static void ggml_critical_section_end(void) {
    atomic_fetch_sub(&g_state_barrier, 1);
 }

+cpu_set_t  ggml_get_numa_affinity(void); // get cpuset from numactl
+
 void ggml_numa_init(uint32_t numa_flag) {
    if (g_state.numa.n_nodes > 0) {
        fprintf(stderr, "ggml_numa_init: NUMA already initialized\n");
@ -2038,7 +2040,7 @@ cpu_set_t ggml_get_numa_affinity(void) {
    pthread_t thread;
    thread = pthread_self();
    CPU_ZERO(&cpuset);
-    int ret = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
+    pthread_getaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
    return cpuset;
 }

@ -2499,8 +2501,7 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
    size_t max_size = 0;

    for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) {
-        size_t bytes = ggml_nbytes(tensor);
-        max_size = MAX(max_size, bytes);
+        max_size = MAX(max_size, ggml_nbytes(tensor));
    }

    return max_size;
@ -11917,10 +11918,8 @@ GGML_CALL void ggml_rope_yarn_corr_dims(
    int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]
 ) {
    // start and end correction dims
-    float start = floorf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_fast, freq_base));
-    float end   =  ceilf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_slow, freq_base));
-    dims[0] = MAX(0, start);
-    dims[1] = MIN(n_dims - 1, end);
+    dims[0] = MAX(0,         floorf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_fast, freq_base)));
+    dims[1] = MIN(n_dims - 1, ceilf(ggml_rope_yarn_corr_dim(n_dims, n_orig_ctx, beta_slow, freq_base)));
 }

 static void ggml_compute_forward_rope_f32(
@ -16617,6 +16616,7 @@ static void set_numa_thread_affinity(int thread_n, int n_threads) {
    }

    int node_num;
+    int rv;
    size_t setsize = CPU_ALLOC_SIZE(g_state.numa.total_cpus);

    switch(g_state.numa.numa_strategy) {
@ -16630,10 +16630,9 @@ static void set_numa_thread_affinity(int thread_n, int n_threads) {
            break;
        case GGML_NUMA_STRATEGY_NUMACTL:
            // use the cpuset that numactl gave us
-            int rv = pthread_setaffinity_np(pthread_self(), setsize, &g_state.numa.cpuset); 
+            rv = pthread_setaffinity_np(pthread_self(), setsize, &g_state.numa.cpuset); 
            if (rv) {
-                fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n",
-                        strerror(rv));
+                fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n",strerror(rv));
            }
            return;
 #ifdef GGML_NUMA_MIRROR
@ -16652,10 +16651,9 @@ static void set_numa_thread_affinity(int thread_n, int n_threads) {
        CPU_SET_S(node->cpus[i], setsize, cpus);
    }

-    int rv = pthread_setaffinity_np(pthread_self(), setsize, cpus);
+    rv = pthread_setaffinity_np(pthread_self(), setsize, cpus);
    if (rv) {
-            fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n",
-                    strerror(rv));
+            fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n",strerror(rv));
    }

    CPU_FREE(cpus);
@ -16676,8 +16674,7 @@ static void clear_numa_thread_affinity(void) {

    int rv = pthread_setaffinity_np(pthread_self(), setsize, cpus);
    if (rv) {
-        fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n",
-            strerror(rv));
+        fprintf(stderr, "warning: pthread_setaffinity_np() failed: %s\n",strerror(rv));
    }

    CPU_FREE(cpus);
--- a/ggml.h
+++ b/ggml.h
@ -217,7 +217,6 @@
 #include <stdint.h>
 #include <stddef.h>
 #include <stdbool.h>
-#include <sched.h>

 #define GGML_FILE_MAGIC   0x67676d6c // "ggml"
 #define GGML_FILE_VERSION 1
@ -670,7 +669,6 @@ extern "C" {

    GGML_API void       ggml_numa_init(uint32_t numa); // call once for better performance on NUMA systems
    GGML_API bool       ggml_is_numa(void); // true if init detected that system has >1 NUMA node
-    GGML_API cpu_set_t  ggml_get_numa_affinity(void); // get cpuset from numactl

    GGML_API void    ggml_print_object (const struct ggml_object * obj);
    GGML_API void    ggml_print_objects(const struct ggml_context * ctx);