Use --reasoning-format, remove forced thinking for now

This commit is contained in:
ochafik 2025-02-08 17:58:33 +00:00
parent cc2c712cf9
commit c0f972bb45
10 changed files with 184 additions and 246 deletions

View file

@ -4055,7 +4055,7 @@ int main(int argc, char ** argv) {
}
auto body = json::parse(req.body);
json data = oaicompat_completion_params_parse(body, params.use_jinja, params.think, ctx_server.chat_templates);
json data = oaicompat_completion_params_parse(body, params.use_jinja, params.reasoning_format, ctx_server.chat_templates);
return handle_completions_impl(
SERVER_TASK_TYPE_COMPLETION,
@ -4068,7 +4068,7 @@ int main(int argc, char ** argv) {
// same with handle_chat_completions, but without inference part
const auto handle_apply_template = [&ctx_server, &params, &res_ok](const httplib::Request & req, httplib::Response & res) {
auto body = json::parse(req.body);
json data = oaicompat_completion_params_parse(body, params.use_jinja, params.think, ctx_server.chat_templates);
json data = oaicompat_completion_params_parse(body, params.use_jinja, params.reasoning_format, ctx_server.chat_templates);
res_ok(res, {{ "prompt", std::move(data.at("prompt")) }});
};