diff --git a/common/arg.cpp b/common/arg.cpp index faac1db99..3d55289c3 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -909,6 +909,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex {"--repeat-last-n"}, "N", string_format("last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)", params.sampling.penalty_last_n), [](common_params & params, int value) { + if (value < -1) { + throw std::runtime_error(string_format("error: invalid repeat-last-n = %d\n", value)); + } params.sampling.penalty_last_n = value; params.sampling.n_prev = std::max(params.sampling.n_prev, params.sampling.penalty_last_n); } @@ -963,6 +966,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex {"--dry-penalty-last-n"}, "N", string_format("set DRY penalty for the last n tokens (default: %d, 0 = disable, -1 = context size)", params.sampling.dry_penalty_last_n), [](common_params & params, int value) { + if (value < -1) { + throw std::runtime_error(string_format("error: invalid dry-penalty-last-n = %d\n", value)); + } params.sampling.dry_penalty_last_n = value; } ).set_sparam()); diff --git a/common/common.cpp b/common/common.cpp index 4f09bee3a..2846a2de3 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -945,6 +945,16 @@ struct common_init_result common_init_from_params(common_params & params) { params.sampling.logit_bias.push_back({llama_token_eos(model), -INFINITY}); } + if (params.sampling.penalty_last_n == -1) { + LOG_INF("%s: setting penalty_last_n to ctx_size = %d\n", __func__, llama_n_ctx(lctx)); + params.sampling.penalty_last_n = llama_n_ctx(lctx); + } + + if (params.sampling.dry_penalty_last_n == -1) { + LOG_INF("%s: setting dry_penalty_last_n to ctx_size = %d\n", __func__, llama_n_ctx(lctx)); + params.sampling.dry_penalty_last_n = llama_n_ctx(lctx); + } + if (params.warmup) { LOG_WRN("%s: warming up the model with an empty run - please wait ... (--no-warmup to disable)\n", __func__); diff --git a/examples/server/server.cpp b/examples/server/server.cpp index d8aeb094e..bc0d042ae 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -183,6 +183,7 @@ struct server_task { static slot_params params_from_json_cmpl( const llama_model * model, + const llama_context * ctx, const common_params & params_base, const json & data) { slot_params params; @@ -237,8 +238,27 @@ struct server_task { params.speculative.n_min = std::max(params.speculative.n_min, 2); params.speculative.n_max = std::max(params.speculative.n_max, 0); + // TODO: add more sanity checks for the input parameters + + if (params.sampling.penalty_last_n < -1) { + throw std::runtime_error("Error: repeat_last_n must be >= -1"); + } + + if (params.sampling.dry_penalty_last_n < -1) { + throw std::runtime_error("Error: dry_penalty_last_n must be >= -1"); + } + + if (params.sampling.penalty_last_n == -1) { + // note: should be the slot's context and not the full context, but it's ok + params.sampling.penalty_last_n = llama_n_ctx(ctx); + } + + if (params.sampling.dry_penalty_last_n == -1) { + params.sampling.dry_penalty_last_n = llama_n_ctx(ctx); + } + if (params.sampling.dry_base < 1.0f) { - params.sampling.dry_base = defaults.sampling.dry_base; + params.sampling.dry_base = defaults.sampling.dry_base; } // sequence breakers for DRY @@ -3379,7 +3399,7 @@ int main(int argc, char ** argv) { task.index = i; task.prompt_tokens = std::move(tokenized_prompts[i]); - task.params = server_task::params_from_json_cmpl(ctx_server.model, ctx_server.params_base, data); + task.params = server_task::params_from_json_cmpl(ctx_server.model, ctx_server.ctx, ctx_server.params_base, data); task.id_selected_slot = json_value(data, "id_slot", -1); // OAI-compat