diff --git a/common/sampling.cpp b/common/sampling.cpp index 6c4e6bc1f..bd4f34b9e 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -191,9 +191,9 @@ llama_token llama_sampling_sample( llama_sample_tail_free(ctx_main, &cur_p, tfs_z, min_keep); llama_sample_typical (ctx_main, &cur_p, typical_p, min_keep); if (min_p != 0.0) { - llama_sample_min_p(ctx_main, &cur_p, min_p, min_keep); + llama_sample_min_p(ctx_main, &cur_p, min_p, min_keep); } else { - llama_sample_top_p(ctx_main, &cur_p, top_p, min_keep); + llama_sample_top_p(ctx_main, &cur_p, top_p, min_keep); } llama_sample_temp (ctx_main, &cur_p, temp); diff --git a/common/sampling.h b/common/sampling.h index 97f2c170a..84051d62a 100644 --- a/common/sampling.h +++ b/common/sampling.h @@ -14,7 +14,7 @@ typedef struct llama_sampling_params { int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens. int32_t top_k = 40; // <= 0 to use vocab size float top_p = 0.95f; // 1.0 = disabled - float min_p = 0.0f; // 1.0 (or 0.0) = disabled + float min_p = 0.00f; // 0.0 = disabled float tfs_z = 1.00f; // 1.0 = disabled float typical_p = 1.00f; // 1.0 = disabled float temp = 0.80f; // 1.0 = disabled