diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp
index 701d8cbce..82a001a0a 100644
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@@ -191,13 +191,16 @@ struct cmd_params {
     bool verbose;
     output_formats output_format;
 };
+//
+//static const cpu_params default_cpuparams(
+//    int32_t(std::thread::hardware_concurrency()),
+//    {false},
+//    false,
+//    1,
+//    false,
+//    false
+//);
 
-int32_t  n_threads = -1;
-bool     cpumask[GGML_N_CORES_MAX] = { false }; // CPU affinity mask.
-bool     mask_valid = false;   // Default: any CPU
-int32_t  priority = 0;      // Scheduling prio : (0 - normal, 1 - medium, 2 - high, 3 - realtime)
-bool     strict_cpu = false;   // Use strict CPU placement
-bool     poll = false;   // Use polling (busywait) to wait for work
 static const cmd_params cmd_params_defaults = {
     /* model         */ {"models/7B/ggml-model-q4_0.gguf"},
     /* n_prompt      */ {512},
@@ -217,7 +220,7 @@ static const cmd_params cmd_params_defaults = {
     /* use_mmap      */ {true},
     /* embeddings    */ {false},
     /* numa          */ GGML_NUMA_STRATEGY_DISABLED,
-    /* cpuparams     */ {int32_t(std::thread::hardware_concurrency()), {false}, false, 1, false, false},
+    /* cpuparams     */ {},
     /* reps          */ 5,
     /* verbose       */ false,
     /* output_format */ MARKDOWN
diff --git a/ggml.c b/ggml.c
index 5a86c1717..ff7a8ca1a 100644
--- a/ggml.c
+++ b/ggml.c
@@ -20062,7 +20062,7 @@ struct ggml_cplan ggml_graph_plan(
     }
 
     cplan.threadpool = threadpool;
-    cplan.n_threads  = n_threads;
+    cplan.n_threads  = MIN(max_tasks, n_threads);
     cplan.work_size  = work_size;
     cplan.work_data  = NULL;