diff --git a/common/common.cpp b/common/common.cpp
index 5ff8c579d..44893a219 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -218,12 +218,12 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
                 break;
             }
             sparams.top_p = std::stof(argv[i]);
-        } else if (arg == "--min-p") {  // Adding min_p argument
+        } else if (arg == "--min-p") {
             if (++i >= argc) {
                 invalid_param = true;
                 break;
             }
-            sparams.min_p = std::stof(argv[i]);  // Parsing and setting the min_p value from command line
+            sparams.min_p = std::stof(argv[i]);
         } else if (arg == "--temp") {
             if (++i >= argc) {
                 invalid_param = true;
@@ -685,7 +685,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
     printf("  -b N, --batch-size N  batch size for prompt processing (default: %d)\n", params.n_batch);
     printf("  --top-k N             top-k sampling (default: %d, 0 = disabled)\n", sparams.top_k);
     printf("  --top-p N             top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)sparams.top_p);
-    printf("  --min-p N             min-p sampling (default: %.2f, 0.0 = disabled)\n", (double)sparams.min_p);
+    printf("  --min-p N             min-p sampling (default: %.1f, 0.0 = disabled)\n", (double)sparams.min_p);
     printf("  --tfs N               tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)sparams.tfs_z);
     printf("  --typical N           locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)\n", (double)sparams.typical_p);
     printf("  --repeat-last-n N     last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)\n", sparams.penalty_last_n);
diff --git a/common/sampling.cpp b/common/sampling.cpp
index bd4f34b9e..673d67a6d 100644
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@@ -190,11 +190,8 @@ llama_token llama_sampling_sample(
             llama_sample_top_k    (ctx_main, &cur_p, top_k,     min_keep);
             llama_sample_tail_free(ctx_main, &cur_p, tfs_z,     min_keep);
             llama_sample_typical  (ctx_main, &cur_p, typical_p, min_keep);
-            if (min_p != 0.0) {
-                llama_sample_min_p(ctx_main, &cur_p, min_p,     min_keep);  
-            } else {
-                llama_sample_top_p(ctx_main, &cur_p, top_p,     min_keep);
-            }
+            llama_sample_top_p    (ctx_main, &cur_p, top_p,     min_keep);
+            llama_sample_min_p    (ctx_main, &cur_p, min_p,     min_keep);
             llama_sample_temp     (ctx_main, &cur_p, temp);
 
             id = llama_sample_token(ctx_main, &cur_p);
diff --git a/llama.cpp b/llama.cpp
index 07ec721e2..1f27016c6 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -7361,44 +7361,29 @@ void llama_sample_top_p(struct llama_context * ctx, llama_token_data_array * can
 }
 
 void llama_sample_min_p(struct llama_context * ctx, llama_token_data_array * candidates, float p, size_t min_keep) {
-    float base_min_p = p;  // This will hold the base minimum probability value
-    float multiplied_min_p;  // This will hold the adjusted minimum probability threshold
+    if (p <= 0.0f || !candidates->size) {
+        return;
+    }
 
-    // Ensure the probabilities are calculated.
     llama_sample_softmax(ctx, candidates);
 
-    // Calculate the multiplication factor based on the highest scoring token.
-    float multiplication_factor = candidates->data[0].p;
+    const int64_t t_start_sample_us = ggml_time_us();
 
-    // Calculate the minimum percentage requirement.
-    multiplied_min_p = base_min_p * multiplication_factor;
+    float scale = candidates->data[0].p; // scale by max prob
+    size_t i = 1; // first token always matches
 
-    // Store the tokens that meet the threshold in a new list.
-    std::vector<llama_token_data> filtered_candidates;
-    filtered_candidates.reserve(candidates->size);  // Reserve to avoid multiple reallocations
-
-    for (size_t i = 0; i < candidates->size; ++i) {
-        // If a token's probability is above the threshold or if we haven't kept enough tokens yet
-        if (candidates->data[i].p >= multiplied_min_p || filtered_candidates.size() < min_keep) {
-            filtered_candidates.push_back(candidates->data[i]);
+    for (; i < candidates->size; ++i) {
+        if (candidates->data[i].p < p * scale && i >= min_keep) {
+            break; // prob too small
         }
     }
 
-    // If not enough candidates meet the threshold, take the top 'min_keep' ones
-    if (filtered_candidates.size() < min_keep) {
-        std::sort(candidates->data, candidates->data + candidates->size, 
-                  [](const llama_token_data & a, const llama_token_data & b) {
-                      return a.p > b.p;  // Sort by probability in descending order
-                  });
-        filtered_candidates.clear();  // Clear the previously filtered candidates
-        for (size_t i = 0; i < min_keep; ++i) {
-            filtered_candidates.push_back(candidates->data[i]);
-        }
-    }
+    // Resize the output vector to keep only the matching tokens
+    candidates->size = i;
 
-    // Now we replace the original candidates with the filtered list.
-    std::copy(filtered_candidates.begin(), filtered_candidates.end(), candidates->data);
-    candidates->size = filtered_candidates.size();
+    if (ctx) {
+        ctx->t_sample_us += ggml_time_us() - t_start_sample_us;
+    }
 }
 
 void llama_sample_tail_free(struct llama_context * ctx, llama_token_data_array * candidates, float z, size_t min_keep) {
diff --git a/llama.h b/llama.h
index 62addffdf..157d75575 100644
--- a/llama.h
+++ b/llama.h
@@ -600,7 +600,7 @@ extern "C" {
                            float   p,
                           size_t   min_keep);
 
-    /// @details Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841#issue-1966758357
+    /// @details Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841
     LLAMA_API void llama_sample_min_p(
             struct llama_context * ctx,
           llama_token_data_array * candidates,