params : allow penalty_last_n == -1 to be equal to context size
ggml-ci
This commit is contained in:
parent
a04a5b526b
commit
9847a375f3
3 changed files with 38 additions and 2 deletions
|
@ -909,6 +909,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
{"--repeat-last-n"}, "N",
|
||||
string_format("last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)", params.sampling.penalty_last_n),
|
||||
[](common_params & params, int value) {
|
||||
if (value < -1) {
|
||||
throw std::runtime_error(string_format("error: invalid repeat-last-n = %d\n", value));
|
||||
}
|
||||
params.sampling.penalty_last_n = value;
|
||||
params.sampling.n_prev = std::max(params.sampling.n_prev, params.sampling.penalty_last_n);
|
||||
}
|
||||
|
@ -963,6 +966,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
{"--dry-penalty-last-n"}, "N",
|
||||
string_format("set DRY penalty for the last n tokens (default: %d, 0 = disable, -1 = context size)", params.sampling.dry_penalty_last_n),
|
||||
[](common_params & params, int value) {
|
||||
if (value < -1) {
|
||||
throw std::runtime_error(string_format("error: invalid dry-penalty-last-n = %d\n", value));
|
||||
}
|
||||
params.sampling.dry_penalty_last_n = value;
|
||||
}
|
||||
).set_sparam());
|
||||
|
|
|
@ -945,6 +945,16 @@ struct common_init_result common_init_from_params(common_params & params) {
|
|||
params.sampling.logit_bias.push_back({llama_token_eos(model), -INFINITY});
|
||||
}
|
||||
|
||||
if (params.sampling.penalty_last_n == -1) {
|
||||
LOG_INF("%s: setting penalty_last_n to ctx_size = %d\n", __func__, llama_n_ctx(lctx));
|
||||
params.sampling.penalty_last_n = llama_n_ctx(lctx);
|
||||
}
|
||||
|
||||
if (params.sampling.dry_penalty_last_n == -1) {
|
||||
LOG_INF("%s: setting dry_penalty_last_n to ctx_size = %d\n", __func__, llama_n_ctx(lctx));
|
||||
params.sampling.dry_penalty_last_n = llama_n_ctx(lctx);
|
||||
}
|
||||
|
||||
if (params.warmup) {
|
||||
LOG_WRN("%s: warming up the model with an empty run - please wait ... (--no-warmup to disable)\n", __func__);
|
||||
|
||||
|
|
|
@ -183,6 +183,7 @@ struct server_task {
|
|||
|
||||
static slot_params params_from_json_cmpl(
|
||||
const llama_model * model,
|
||||
const llama_context * ctx,
|
||||
const common_params & params_base,
|
||||
const json & data) {
|
||||
slot_params params;
|
||||
|
@ -237,8 +238,27 @@ struct server_task {
|
|||
params.speculative.n_min = std::max(params.speculative.n_min, 2);
|
||||
params.speculative.n_max = std::max(params.speculative.n_max, 0);
|
||||
|
||||
// TODO: add more sanity checks for the input parameters
|
||||
|
||||
if (params.sampling.penalty_last_n < -1) {
|
||||
throw std::runtime_error("Error: repeat_last_n must be >= -1");
|
||||
}
|
||||
|
||||
if (params.sampling.dry_penalty_last_n < -1) {
|
||||
throw std::runtime_error("Error: dry_penalty_last_n must be >= -1");
|
||||
}
|
||||
|
||||
if (params.sampling.penalty_last_n == -1) {
|
||||
// note: should be the slot's context and not the full context, but it's ok
|
||||
params.sampling.penalty_last_n = llama_n_ctx(ctx);
|
||||
}
|
||||
|
||||
if (params.sampling.dry_penalty_last_n == -1) {
|
||||
params.sampling.dry_penalty_last_n = llama_n_ctx(ctx);
|
||||
}
|
||||
|
||||
if (params.sampling.dry_base < 1.0f) {
|
||||
params.sampling.dry_base = defaults.sampling.dry_base;
|
||||
params.sampling.dry_base = defaults.sampling.dry_base;
|
||||
}
|
||||
|
||||
// sequence breakers for DRY
|
||||
|
@ -3379,7 +3399,7 @@ int main(int argc, char ** argv) {
|
|||
task.index = i;
|
||||
|
||||
task.prompt_tokens = std::move(tokenized_prompts[i]);
|
||||
task.params = server_task::params_from_json_cmpl(ctx_server.model, ctx_server.params_base, data);
|
||||
task.params = server_task::params_from_json_cmpl(ctx_server.model, ctx_server.ctx, ctx_server.params_base, data);
|
||||
task.id_selected_slot = json_value(data, "id_slot", -1);
|
||||
|
||||
// OAI-compat
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue