sampling : refactor + optimize penalties sampler (#10803)
* sampling : refactor + optimize penalties sampler ggml-ci * common : apply ignore_eos as logit bias ggml-ci * batched : remove penalties sampler * params : allow penalty_last_n == -1 to be equal to context size ggml-ci * common : by default, move the penalties at the end of the sampling chain ggml-ci * common : ignore all EOG tokens Co-authored-by: Diego Devesa <slarengh@gmail.com> * common : move back the penalties at the front of the sampling chain ggml-ci * readme : restore hint about --ignore-eos flag [no ci] * llama : minor ggml-ci * webui : update --------- Co-authored-by: Diego Devesa <slarengh@gmail.com>
This commit is contained in:
parent
4ddd199f6f
commit
644fd71b44
17 changed files with 111 additions and 152 deletions
|
@ -855,13 +855,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
params.sampling.ignore_eos = true;
|
||||
}
|
||||
).set_sparam());
|
||||
add_opt(common_arg(
|
||||
{"--penalize-nl"},
|
||||
string_format("penalize newline tokens (default: %s)", params.sampling.penalize_nl ? "true" : "false"),
|
||||
[](common_params & params) {
|
||||
params.sampling.penalize_nl = true;
|
||||
}
|
||||
).set_sparam());
|
||||
add_opt(common_arg(
|
||||
{"--temp"}, "N",
|
||||
string_format("temperature (default: %.1f)", (double)params.sampling.temp),
|
||||
|
@ -916,6 +909,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
{"--repeat-last-n"}, "N",
|
||||
string_format("last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)", params.sampling.penalty_last_n),
|
||||
[](common_params & params, int value) {
|
||||
if (value < -1) {
|
||||
throw std::runtime_error(string_format("error: invalid repeat-last-n = %d\n", value));
|
||||
}
|
||||
params.sampling.penalty_last_n = value;
|
||||
params.sampling.n_prev = std::max(params.sampling.n_prev, params.sampling.penalty_last_n);
|
||||
}
|
||||
|
@ -970,6 +966,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
{"--dry-penalty-last-n"}, "N",
|
||||
string_format("set DRY penalty for the last n tokens (default: %d, 0 = disable, -1 = context size)", params.sampling.dry_penalty_last_n),
|
||||
[](common_params & params, int value) {
|
||||
if (value < -1) {
|
||||
throw std::runtime_error(string_format("error: invalid dry-penalty-last-n = %d\n", value));
|
||||
}
|
||||
params.sampling.dry_penalty_last_n = value;
|
||||
}
|
||||
).set_sparam());
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue