tool-call: support Command R7B (+ return tool_plan "thoughts" in API) (#11585)

* `tool-call`: support Command R7B (w/ tool_plan return)

* `tool-call`: cleaner preservation of tokens + warn when likely bad chat template override

* `tool-call`: test cleanup / handle lazy grammar triggers
This commit is contained in:
Olivier Chafik 2025-02-02 09:25:38 +00:00 committed by GitHub
parent 69804487e0
commit bfcce4d693
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 420 additions and 56 deletions

View file

@ -4,6 +4,7 @@
#include "llama-cpp.h"
#include <set>
#include <string>
#include <vector>
#include <sstream>
@ -163,6 +164,7 @@ struct common_params_sampling {
bool grammar_lazy = false;
std::vector<common_grammar_trigger> grammar_trigger_words; // optional trigger words to trigger lazy grammar
std::vector<llama_token> grammar_trigger_tokens; // optional trigger tokens to trigger lazy grammar and print trigger special tokens.
std::set<llama_token> preserved_tokens;
std::vector<llama_logit_bias> logit_bias; // logit biases to apply
@ -621,6 +623,7 @@ struct common_chat_msg {
std::string role;
std::string content;
std::vector<common_tool_call> tool_calls;
std::string tool_plan = "";
};
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid