--think to force any model to return reasoning_content (or just parse <think> for deepseek r1)

This commit is contained in:
ochafik 2025-02-05 12:16:37 +00:00
parent 5d60cebbcc
commit 9d7c3cc51b
9 changed files with 306 additions and 145 deletions

View file

@ -578,6 +578,7 @@ static json oaicompat_completion_params_parse(const json & body) {
static json oaicompat_completion_params_parse(
const json & body, /* openai api json semantics */
bool use_jinja,
bool think,
const common_chat_templates & chat_templates)
{
json llama_params;
@ -633,9 +634,10 @@ static json oaicompat_completion_params_parse(
throw std::runtime_error("Cannot use custom grammar constraints with tools.");
}
common_chat_inputs inputs;
inputs.messages = body.at("messages");
inputs.tools = tools;
inputs.tool_choice = tool_choice;
inputs.think = think;
inputs.messages = body.at("messages");
inputs.tools = tools;
inputs.tool_choice = tool_choice;
inputs.parallel_tool_calls = json_value(body, "parallel_tool_calls", false);
if (inputs.parallel_tool_calls && !tmpl.original_caps().supports_parallel_tool_calls) {
LOG_DBG("Disabling parallel_tool_calls because the template does not support it\n");