diff --git a/common/sampling.cpp b/common/sampling.cpp
index c4c63678f..c665065be 100644
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@@ -133,6 +133,7 @@ static void sampler_queue(
     const float         dynatemp_range    = params.dynatemp_range;
     const float         dynatemp_exponent = params.dynatemp_exponent;
     const float         smoothing_factor  = params.smoothing_factor;
+    const float         smoothing_curve   = params.smoothing_curve;
     const int32_t       top_k             = params.top_k;
     const float         top_p             = params.top_p;
     const float         min_p             = params.min_p;
@@ -151,7 +152,7 @@ static void sampler_queue(
                 if (dynatemp_range > 0 || smoothing_factor > 0) {
                     float dynatemp_min = std::max(0.0f, temp - dynatemp_range);
                     float dynatemp_max = std::max(0.0f, temp + dynatemp_range);
-                    llama_sample_entropy(ctx_main, &cur_p, dynatemp_min, dynatemp_max, dynatemp_exponent, smoothing_factor);
+                    llama_sample_entropy(ctx_main, &cur_p, dynatemp_min, dynatemp_max, dynatemp_exponent, smoothing_factor, smoothing_curve);
                 } else {
                     llama_sample_temp(ctx_main, &cur_p, temp);
                 }
diff --git a/common/sampling.h b/common/sampling.h
index e34cc3125..29f365e1e 100644
--- a/common/sampling.h
+++ b/common/sampling.h
@@ -32,6 +32,7 @@ typedef struct llama_sampling_params {
     float       dynatemp_range        = 0.00f;    // 0.0 = disabled
     float       dynatemp_exponent     = 1.00f;    // controls how entropy maps to temperature in dynamic temperature sampler
     float       smoothing_factor      = 0.0f;     // controls the quadratic adjustment in smooth sampling
+    float       smoothing_curve       = 1.0f;     // controls the quadratic adjustment in smooth sampling
     int32_t     penalty_last_n        = 64;       // last n tokens to penalize (0 = disable penalty, -1 = context size)
     float       penalty_repeat        = 1.00f;    // 1.0 = disabled
     float       penalty_freq          = 0.00f;    // 0.0 = disabled
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index d79065ca1..08d515451 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -840,6 +840,7 @@ struct server_context {
         slot.sparams.dynatemp_range    = json_value(data, "dynatemp_range",    default_sparams.dynatemp_range);
         slot.sparams.dynatemp_exponent = json_value(data, "dynatemp_exponent", default_sparams.dynatemp_exponent);
         slot.sparams.smoothing_factor  = json_value(data, "smoothing_factor",  default_sparams.smoothing_factor);
+        slot.sparams.smoothing_curve   = json_value(data, "smoothing_curve",   default_sparams.smoothing_curve);
         slot.sparams.penalty_last_n    = json_value(data, "repeat_last_n",     default_sparams.penalty_last_n);
         slot.sparams.penalty_repeat    = json_value(data, "repeat_penalty",    default_sparams.penalty_repeat);
         slot.sparams.penalty_freq      = json_value(data, "frequency_penalty", default_sparams.penalty_freq);
diff --git a/llama.cpp b/llama.cpp
index e4091bd16..032cf234f 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -12183,7 +12183,7 @@ void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * c
     }
 }
 
-void llama_sample_entropy(struct llama_context* ctx, llama_token_data_array* candidates_p, float min_temp, float max_temp, float exponent_val, float smoothing_factor) {
+void llama_sample_entropy(struct llama_context* ctx, llama_token_data_array* candidates_p, float min_temp, float max_temp, float exponent_val, float smoothing_factor, float smoothing_curve) {
     const int64_t t_start_sample_us = ggml_time_us();
 
     // no need to do anything if there is only one (or zero) candidates
@@ -12196,10 +12196,12 @@ void llama_sample_entropy(struct llama_context* ctx, llama_token_data_array* can
         llama_sample_softmax(ctx, candidates_p);
         float h = candidates_p->data[0].logit; // Find the maximum logit for h to be added after the transformation
 
-        // Apply quadratic transformation using the smoothing_factor
+        // Apply the modified quadratic transformation using the smoothing_factor and smoothing_curve
         for (size_t i = 0; i < candidates_p->size; ++i) {
             float logit_shifted = candidates_p->data[i].logit - h;
-            candidates_p->data[i].logit = -smoothing_factor * logit_shifted * logit_shifted + h;
+            float k = (3 - smoothing_curve) / 2;
+            float s = (smoothing_curve - 1) / 2;
+            candidates_p->data[i].logit = -(k * smoothing_factor * logit_shifted * logit_shifted) + (s * smoothing_factor * logit_shifted * logit_shifted * logit_shifted) + h;
         }
         llama_sample_softmax(ctx, candidates_p);
     }
diff --git a/llama.h b/llama.h
index 03ccd23d7..eb69073ee 100644
--- a/llama.h
+++ b/llama.h
@@ -871,7 +871,8 @@ extern "C" {
                            float   min_temp,
                            float   max_temp,
                            float   exponent_val,
-                           float   smoothing_factor);
+                           float   smoothing_factor,
+                           float   smoothing_curve);
 
     LLAMA_API void llama_sample_temp(
             struct llama_context * ctx,