Tool call support (generic + native for Llama, Functionary, Hermes, Mistral, Firefunction, DeepSeek) w/ lazy grammars (#9639)

--------- Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: Xuan Son Nguyen <son@huggingface.co>
2025-01-30 19:13:58 +00:00 · 2025-01-30 19:13:58 +00:00 · 8b576b6c55
commit 8b576b6c55
parent 27d135c970
48 changed files with 3861 additions and 156 deletions
--- a/common/common.h
+++ b/common/common.h
@ -109,6 +109,11 @@ enum common_conversation_mode {
    COMMON_CONVERSATION_MODE_AUTO     = 2,
 };

+struct common_grammar_trigger {
+    std::string word;
+    bool at_start;
+};
+
 // sampling parameters
 struct common_params_sampling {
    uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
@ -154,7 +159,10 @@ struct common_params_sampling {
        COMMON_SAMPLER_TYPE_TEMPERATURE,
    };

-    std::string grammar; // optional BNF-like grammar to constrain sampling
+    std::string                         grammar; // optional BNF-like grammar to constrain sampling
+    bool                                grammar_lazy = false;
+    std::vector<common_grammar_trigger> grammar_trigger_words;  // optional trigger words to trigger lazy grammar
+    std::vector<llama_token>            grammar_trigger_tokens; // optional trigger tokens to trigger lazy grammar and print trigger special tokens.

    std::vector<llama_logit_bias> logit_bias; // logit biases to apply

@ -602,10 +610,17 @@ std::string common_detokenize(
 // Chat template utils
 //

+struct common_tool_call {
+    std::string name;
+    std::string arguments;
+    std::string id;
+};
+
 // same with llama_chat_message, but uses std::string
 struct common_chat_msg {
    std::string role;
    std::string content;
+    std::vector<common_tool_call> tool_calls;
 };

 // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid