Cubic "smoothing curve" support
This commit is contained in:
parent
b5dbcf6f5e
commit
a4e54abe6f
5 changed files with 11 additions and 5 deletions
|
@ -133,6 +133,7 @@ static void sampler_queue(
|
|||
const float dynatemp_range = params.dynatemp_range;
|
||||
const float dynatemp_exponent = params.dynatemp_exponent;
|
||||
const float smoothing_factor = params.smoothing_factor;
|
||||
const float smoothing_curve = params.smoothing_curve;
|
||||
const int32_t top_k = params.top_k;
|
||||
const float top_p = params.top_p;
|
||||
const float min_p = params.min_p;
|
||||
|
@ -151,7 +152,7 @@ static void sampler_queue(
|
|||
if (dynatemp_range > 0 || smoothing_factor > 0) {
|
||||
float dynatemp_min = std::max(0.0f, temp - dynatemp_range);
|
||||
float dynatemp_max = std::max(0.0f, temp + dynatemp_range);
|
||||
llama_sample_entropy(ctx_main, &cur_p, dynatemp_min, dynatemp_max, dynatemp_exponent, smoothing_factor);
|
||||
llama_sample_entropy(ctx_main, &cur_p, dynatemp_min, dynatemp_max, dynatemp_exponent, smoothing_factor, smoothing_curve);
|
||||
} else {
|
||||
llama_sample_temp(ctx_main, &cur_p, temp);
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@ typedef struct llama_sampling_params {
|
|||
float dynatemp_range = 0.00f; // 0.0 = disabled
|
||||
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
|
||||
float smoothing_factor = 0.0f; // controls the quadratic adjustment in smooth sampling
|
||||
float smoothing_curve = 1.0f; // controls the quadratic adjustment in smooth sampling
|
||||
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
||||
float penalty_repeat = 1.00f; // 1.0 = disabled
|
||||
float penalty_freq = 0.00f; // 0.0 = disabled
|
||||
|
|
|
@ -840,6 +840,7 @@ struct server_context {
|
|||
slot.sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
|
||||
slot.sparams.dynatemp_exponent = json_value(data, "dynatemp_exponent", default_sparams.dynatemp_exponent);
|
||||
slot.sparams.smoothing_factor = json_value(data, "smoothing_factor", default_sparams.smoothing_factor);
|
||||
slot.sparams.smoothing_curve = json_value(data, "smoothing_curve", default_sparams.smoothing_curve);
|
||||
slot.sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n);
|
||||
slot.sparams.penalty_repeat = json_value(data, "repeat_penalty", default_sparams.penalty_repeat);
|
||||
slot.sparams.penalty_freq = json_value(data, "frequency_penalty", default_sparams.penalty_freq);
|
||||
|
|
|
@ -12183,7 +12183,7 @@ void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * c
|
|||
}
|
||||
}
|
||||
|
||||
void llama_sample_entropy(struct llama_context* ctx, llama_token_data_array* candidates_p, float min_temp, float max_temp, float exponent_val, float smoothing_factor) {
|
||||
void llama_sample_entropy(struct llama_context* ctx, llama_token_data_array* candidates_p, float min_temp, float max_temp, float exponent_val, float smoothing_factor, float smoothing_curve) {
|
||||
const int64_t t_start_sample_us = ggml_time_us();
|
||||
|
||||
// no need to do anything if there is only one (or zero) candidates
|
||||
|
@ -12196,10 +12196,12 @@ void llama_sample_entropy(struct llama_context* ctx, llama_token_data_array* can
|
|||
llama_sample_softmax(ctx, candidates_p);
|
||||
float h = candidates_p->data[0].logit; // Find the maximum logit for h to be added after the transformation
|
||||
|
||||
// Apply quadratic transformation using the smoothing_factor
|
||||
// Apply the modified quadratic transformation using the smoothing_factor and smoothing_curve
|
||||
for (size_t i = 0; i < candidates_p->size; ++i) {
|
||||
float logit_shifted = candidates_p->data[i].logit - h;
|
||||
candidates_p->data[i].logit = -smoothing_factor * logit_shifted * logit_shifted + h;
|
||||
float k = (3 - smoothing_curve) / 2;
|
||||
float s = (smoothing_curve - 1) / 2;
|
||||
candidates_p->data[i].logit = -(k * smoothing_factor * logit_shifted * logit_shifted) + (s * smoothing_factor * logit_shifted * logit_shifted * logit_shifted) + h;
|
||||
}
|
||||
llama_sample_softmax(ctx, candidates_p);
|
||||
}
|
||||
|
|
3
llama.h
3
llama.h
|
@ -871,7 +871,8 @@ extern "C" {
|
|||
float min_temp,
|
||||
float max_temp,
|
||||
float exponent_val,
|
||||
float smoothing_factor);
|
||||
float smoothing_factor,
|
||||
float smoothing_curve);
|
||||
|
||||
LLAMA_API void llama_sample_temp(
|
||||
struct llama_context * ctx,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue