From a2c94ae5bef97d23ba1ca6c7c149ded1ac8214ce Mon Sep 17 00:00:00 2001 From: l3utterfly Date: Fri, 19 Jan 2024 10:05:59 +0900 Subject: [PATCH] exposed exponent_val in dynamic temp sampler --- common/sampling.cpp | 3 ++- common/sampling.h | 1 + llama.cpp | 4 +--- llama.h | 3 ++- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/common/sampling.cpp b/common/sampling.cpp index 6c4528dd7..88b703127 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -130,6 +130,7 @@ static void sampler_queue( const float temp = params.temp; const float dynatemp_range = params.dynatemp_range; + const float dynatemp_exponent = params.dynatemp_exponent; const int32_t top_k = params.top_k <= 0 ? n_vocab : params.top_k; const float top_p = params.top_p; const float min_p = params.min_p; @@ -154,7 +155,7 @@ static void sampler_queue( dynatemp_min = dynatemp_min<0?0:dynatemp_min; dynatemp_max = dynatemp_max<0?0:dynatemp_max; - llama_sample_entropy(ctx_main, &cur_p, dynatemp_min, dynatemp_max); + llama_sample_entropy(ctx_main, &cur_p, dynatemp_min, dynatemp_max, dynatemp_exponent); } else { diff --git a/common/sampling.h b/common/sampling.h index 78ced2c5f..88899c094 100644 --- a/common/sampling.h +++ b/common/sampling.h @@ -19,6 +19,7 @@ typedef struct llama_sampling_params { float typical_p = 1.00f; // 1.0 = disabled float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities float dynatemp_range = 0.00f; // 0.0 = disabled + float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size) float penalty_repeat = 1.10f; // 1.0 = disabled float penalty_freq = 0.00f; // 0.0 = disabled diff --git a/llama.cpp b/llama.cpp index 4e70fbd6d..3e5c1ff21 100644 --- a/llama.cpp +++ b/llama.cpp @@ -7783,13 +7783,11 @@ void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * c } } -void llama_sample_entropy(struct llama_context * ctx, llama_token_data_array * candidates_p, float min_temp = 0, float max_temp = 2.0f) { +void llama_sample_entropy(struct llama_context * ctx, llama_token_data_array * candidates_p, float min_temp, float max_temp, float exponent_val) { const int64_t t_start_sample_us = ggml_time_us(); llama_sample_softmax(ctx, candidates_p); - float exponent_val = 1.0f; - // Calculate entropy of the softmax probabilities float entropy = 0.0f; for (size_t i = 0; i < candidates_p->size; ++i) { diff --git a/llama.h b/llama.h index 53f2f92ba..d6688810b 100644 --- a/llama.h +++ b/llama.h @@ -779,7 +779,8 @@ extern "C" { struct llama_context * ctx, llama_token_data_array * candidates_p, float min_temp, - float max_temp); + float max_temp, + float exponent_val); LLAMA_API void llama_sample_temp( struct llama_context * ctx,