minja: update chat template goldens w/ llama.3.1 arguments workaround

2024-09-26 18:10:27 +01:00 · 2024-09-26 18:10:27 +01:00 · 296331bba3
commit 296331bba3
parent 9cfe4d7202
10 changed files with 168 additions and 27 deletions
--- a/tests/chat/contexts/tool_use.json
+++ b/tests/chat/contexts/tool_use.json
@ -12,7 +12,7 @@
          "id": "call_1",
          "type": "function",
          "function": {
-            "arguments": {"code": "print('Hello, World!')"},
+            "arguments": "{\"code\": \"print('Hello, World!')\"}",
            "name": "ipython"
          }
        }
@ -39,7 +39,7 @@
          "id": "call_2",
          "type": "function",
          "function": {
-            "arguments": {"condition":true},
+            "arguments": "{\"condition\":true}",
            "name": "test"
          }
        }
@ -66,7 +66,7 @@
          "id": "call_3",
          "type": "function",
          "function": {
-            "arguments": {"query": "what is truth anyway am I right?"},
+            "arguments": "{\"query\": \"what is truth anyway am I right?\"}",
            "name": "brave_search"
          }
        }
--- a/tests/chat/goldens/CohereForAI-c4ai-command-r-plus-tool_use-tool_use.txt
+++ b/tests/chat/goldens/CohereForAI-c4ai-command-r-plus-tool_use-tool_use.txt
@ -59,9 +59,7 @@ Action:
 [
    {
        "tool_name": "ipython",
-        "parameters": {
+        "parameters": "{\"code\": \"print('Hello, World!')\"}"
            "code": "print('Hello, World!')"
        }
    }
 ]```
 <|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><results>
@ -71,9 +69,7 @@ Action:
 [
    {
        "tool_name": "test",
-        "parameters": {
+        "parameters": "{\"condition\":true}"
            "condition": true
        }
    }
 ]```
 <|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><results>
@ -83,9 +79,7 @@ Action:
 [
    {
        "tool_name": "brave_search",
-        "parameters": {
+        "parameters": "{\"query\": \"what is truth anyway am I right?\"}"
            "query": "what is truth anyway am I right?"
        }
    }
 ]```
 <|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|><results>
--- a/tests/chat/goldens/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use-tool_use.txt
+++ b/tests/chat/goldens/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use-tool_use.txt
@ -35,7 +35,7 @@ Anything else?<|im_end|>
 Test a tautology.<|im_end|>
 <|im_start|>assistant
 <tool_call>
-{"name": "test", "arguments": {"condition": true}}
+{"name": "test", "arguments": {"condition":true}}
 </tool_call><|im_end|>
 <|im_start|>tool
 <tool_response>
--- a/tests/chat/goldens/NousResearch-Hermes-2-Pro-Mistral-7B-tool_use-tool_use.txt
+++ b/tests/chat/goldens/NousResearch-Hermes-2-Pro-Mistral-7B-tool_use-tool_use.txt
@ -35,7 +35,7 @@ Anything else?<|im_end|>
 Test a tautology.<|im_end|>
 <|im_start|>assistant
 <tool_call>
-{"name": "test", "arguments": {"condition": true}}
+{"name": "test", "arguments": {"condition":true}}
 </tool_call><|im_end|>
 <|im_start|>tool
 <tool_response>
--- a/tests/chat/goldens/NousResearch-Hermes-3-Llama-3.1-70B-tool_use-tool_use.txt
+++ b/tests/chat/goldens/NousResearch-Hermes-3-Llama-3.1-70B-tool_use-tool_use.txt
@ -35,7 +35,7 @@ Anything else?<|im_end|>
 Test a tautology.<|im_end|>
 <|im_start|>assistant
 <tool_call>
-{"name": "test", "arguments": {"condition": true}}
+{"name": "test", "arguments": {"condition":true}}
 </tool_call><|im_end|>
 <|im_start|>tool
 <tool_response>
--- a/tests/chat/goldens/Qwen-Qwen2.5-7B-Instruct-tool_use.txt
+++ b/tests/chat/goldens/Qwen-Qwen2.5-7B-Instruct-tool_use.txt
@ -21,7 +21,7 @@ For each function call, return a json object with function name and arguments wi
 Print a hello world message with python.<|im_end|>
 <|im_start|>assistant
 <tool_call>
-{"name": "ipython", "arguments": {"code": "print('Hello, World!')"}}
+{"name": "ipython", "arguments": "{\"code\": \"print('Hello, World!')\"}"}
 </tool_call><|im_end|>
 <|im_start|>user
 <tool_response>
@ -33,7 +33,7 @@ Anything else?<|im_end|>
 Test a tautology.<|im_end|>
 <|im_start|>assistant
 <tool_call>
-{"name": "test", "arguments": {"condition": true}}
+{"name": "test", "arguments": "{\"condition\":true}"}
 </tool_call><|im_end|>
 <|im_start|>user
 <tool_response>
@ -45,7 +45,7 @@ Truth is definitely true.<|im_end|>
 Check it on the web.<|im_end|>
 <|im_start|>assistant
 <tool_call>
-{"name": "brave_search", "arguments": {"query": "what is truth anyway am I right?"}}
+{"name": "brave_search", "arguments": "{\"query\": \"what is truth anyway am I right?\"}"}
 </tool_call><|im_end|>
 <|im_start|>user
 <tool_response>
--- a/tests/chat/goldens/Qwen-Qwen2.5-Math-7B-Instruct-tool_use.txt
+++ b/tests/chat/goldens/Qwen-Qwen2.5-Math-7B-Instruct-tool_use.txt
@ -21,7 +21,7 @@ For each function call, return a json object with function name and arguments wi
 Print a hello world message with python.<|im_end|>
 <|im_start|>assistant
 <tool_call>
-{"name": "ipython", "arguments": {"code": "print('Hello, World!')"}}
+{"name": "ipython", "arguments": "{\"code\": \"print('Hello, World!')\"}"}
 </tool_call><|im_end|>
 <|im_start|>user
 <tool_response>
@ -33,7 +33,7 @@ Anything else?<|im_end|>
 Test a tautology.<|im_end|>
 <|im_start|>assistant
 <tool_call>
-{"name": "test", "arguments": {"condition": true}}
+{"name": "test", "arguments": "{\"condition\":true}"}
 </tool_call><|im_end|>
 <|im_start|>user
 <tool_response>
@ -45,7 +45,7 @@ Truth is definitely true.<|im_end|>
 Check it on the web.<|im_end|>
 <|im_start|>assistant
 <tool_call>
-{"name": "brave_search", "arguments": {"query": "what is truth anyway am I right?"}}
+{"name": "brave_search", "arguments": "{\"query\": \"what is truth anyway am I right?\"}"}
 </tool_call><|im_end|>
 <|im_start|>user
 <tool_response>
--- a/tests/chat/goldens/meetkai-functionary-medium-v3.1-tool_use.txt
+++ b/tests/chat/goldens/meetkai-functionary-medium-v3.1-tool_use.txt
@ -1 +1,66 @@
-ERROR: can only concatenate str (not "dict") to str
+<|startoftext|><|start_header_id|>system<|end_header_id|>
 Cutting Knowledge Date: December 2023
 You have access to the following functions:
 Use the function 'ipython' to 'Runs code in an ipython interpreter and returns the result of the execution after 60 seconds.'
 {&#34;name&#34;: &#34;ipython&#34;, &#34;description&#34;: &#34;Runs code in an ipython interpreter and returns the result of the execution after 60 seconds.&#34;, &#34;parameters&#34;: {&#34;type&#34;: &#34;object&#34;, &#34;properties&#34;: {&#34;code&#34;: {&#34;type&#34;: &#34;string&#34;, &#34;description&#34;: &#34;The code to run in the ipython interpreter.&#34;}}, &#34;required&#34;: [&#34;code&#34;]}}
 Use the function 'brave_search' to 'Executes a web search with Brave.'
 {&#34;name&#34;: &#34;brave_search&#34;, &#34;description&#34;: &#34;Executes a web search with Brave.&#34;, &#34;parameters&#34;: {&#34;type&#34;: &#34;object&#34;, &#34;properties&#34;: {&#34;query&#34;: {&#34;type&#34;: &#34;string&#34;, &#34;description&#34;: &#34;The query to search for.&#34;}}, &#34;required&#34;: [&#34;query&#34;]}}
 Use the function 'wolfram_alpha' to 'Executes a query with Wolfram Alpha.'
 {&#34;name&#34;: &#34;wolfram_alpha&#34;, &#34;description&#34;: &#34;Executes a query with Wolfram Alpha.&#34;, &#34;parameters&#34;: {&#34;type&#34;: &#34;object&#34;, &#34;properties&#34;: {&#34;query&#34;: {&#34;type&#34;: &#34;string&#34;, &#34;description&#34;: &#34;The query to execute.&#34;}}, &#34;required&#34;: [&#34;query&#34;]}}
 Use the function 'test' to 'Runs a test.'
 {&#34;name&#34;: &#34;test&#34;, &#34;description&#34;: &#34;Runs a test.&#34;, &#34;parameters&#34;: {&#34;type&#34;: &#34;object&#34;, &#34;properties&#34;: {&#34;condition&#34;: {&#34;type&#34;: &#34;boolean&#34;, &#34;description&#34;: &#34;The condition to test.&#34;}}, &#34;required&#34;: [&#34;condition&#34;]}}
 Think very carefully before calling functions.
 If a you choose to call a function ONLY reply in the following format:
 <{start_tag}={function_name}>{parameters}{end_tag}
 where
 start_tag => `<function`
 parameters => a JSON dict with the function argument name as key and function argument value as value.
 end_tag => `</function>`
 Here is an example,
 <function=example_function_name>{"example_name": "example_value"}</function>
 Reminder:
 - If looking for real time information use relevant functions before falling back to brave_search
 - Function calls MUST follow the specified format, start with <function= and end with </function>
 - Required parameters MUST be specified
 - Only call one function at a time
 - Put the entire function call reply on one line
 <|eot_id|><|start_header_id|>user<|end_header_id|>
 Print a hello world message with python.<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 <function=ipython>{"code": "print('Hello, World!')"}</function><|eom_id|><|start_header_id|>ipython<|end_header_id|>
 {"stdout": "Hello, World!"}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 Anything else?<|eot_id|><|start_header_id|>user<|end_header_id|>
 Test a tautology.<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 <function=test>{"condition":true}</function><|eom_id|><|start_header_id|>ipython<|end_header_id|>
 true<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 Truth is definitely true.<|eot_id|><|start_header_id|>user<|end_header_id|>
 Check it on the web.<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 <function=brave_search>{"query": "what is truth anyway am I right?"}</function><|eom_id|><|start_header_id|>ipython<|end_header_id|>
 {"title":"Truth: don't ask the web, ask an LLM instead!","url":"https://en.wikipedia.org/wiki/Truth"}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 I don't need the web to answer you but I did check, as you asked. What now?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
--- a/tests/chat/goldens/meetkai-functionary-medium-v3.2-tool_use.txt
+++ b/tests/chat/goldens/meetkai-functionary-medium-v3.2-tool_use.txt
@ -1 +1,70 @@
-ERROR: can only concatenate str (not "dict") to str
+<|startoftext|><|start_header_id|>system<|end_header_id|>
 You are capable of executing available function(s) if required.
 Only execute function(s) when absolutely necessary.
 Ask for the required input to:recipient==all
 Use JSON for function arguments.
 Respond in this format:
 >>>${recipient}
 ${content}
 Available functions:
 // Supported function definitions that should be called when necessary.
 namespace functions {
 // Runs code in an ipython interpreter and returns the result of the execution after 60 seconds.
 type ipython = (_: {
 // The code to run in the ipython interpreter.
 code: string,
 }) => any;
 // Executes a web search with Brave.
 type brave_search = (_: {
 // The query to search for.
 query: string,
 }) => any;
 // Executes a query with Wolfram Alpha.
 type wolfram_alpha = (_: {
 // The query to execute.
 query: string,
 }) => any;
 // Runs a test.
 type test = (_: {
 // The condition to test.
 condition: boolean,
 }) => any;
 } // namespace functions<|eot_id|><|start_header_id|>user<|end_header_id|>
 Print a hello world message with python.<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 >>>ipython
 {"code": "print('Hello, World!')"}<|eot_id|><|start_header_id|>tool<|end_header_id|>
 {"stdout": "Hello, World!"}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 >>>all
 Anything else?<|eot_id|><|start_header_id|>user<|end_header_id|>
 Test a tautology.<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 >>>test
 {"condition":true}<|eot_id|><|start_header_id|>tool<|end_header_id|>
 true<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 >>>all
 Truth is definitely true.<|eot_id|><|start_header_id|>user<|end_header_id|>
 Check it on the web.<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 >>>brave_search
 {"query": "what is truth anyway am I right?"}<|eot_id|><|start_header_id|>tool<|end_header_id|>
 {"title":"Truth: don't ask the web, ask an LLM instead!","url":"https://en.wikipedia.org/wiki/Truth"}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 >>>all
 I don't need the web to answer you but I did check, as you asked. What now?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 >>>
--- a/tests/update_jinja_goldens.py
+++ b/tests/update_jinja_goldens.py
@ -26,7 +26,7 @@ import jinja2.ext
 import re
 # import requests
-logging.basicConfig(level=logging.INFO)
+logging.basicConfig(level=logging.INFO, format='%(message)s')
 logger = logging.getLogger(__name__)
 model_ids = [
@ -85,11 +85,11 @@ def strftime_now(format):
 def handle_chat_template(model_id, variant, template_src):
-    logger.info(f"# {model_id} @ {variant}")
+    logger.info(f"# {model_id}{' @ ' + variant if variant else ''}")
    model_name = model_id.replace("/", "-")
    base_name = f'{model_name}-{variant}' if variant else model_name
    template_file = f'tests/chat/templates/{base_name}.jinja'
-    logger.info(f'template_file: {template_file}')
+    logger.info(f'- template_file: {template_file}')
    with open(template_file, 'w') as f:
        f.write(template_src)
@ -125,8 +125,20 @@ def handle_chat_template(model_id, variant, template_src):
        output_file = f'tests/chat/goldens/{base_name}-{context_name}.txt'
        logger.info(f"- {output_file}")
        # The template (and workarounds) may modify the context in place, so we need to make a copy of it.
        actual_context = json.loads(json.dumps(context))
        # Work around Llama-3.1 template quirk: it expects tool_call.function.arguments to be an object rather than its JSON string representation.
        if 'tool_call.arguments | items' in template_src:
            for message in actual_context['messages']:
                if 'tool_calls' in message:
                    for tool_call in message['tool_calls']:
                        arguments = tool_call['function']['arguments']
                        tool_call['function']['arguments'] = json.loads(arguments)
        try:
-            output = template.render(**context)
+            output = template.render(**actual_context)
        except Exception as e1:
            # Some templates (e.g. Phi-3-medium-128k's) expect a non-null "content" key in each message.
            for message in context["messages"]:
@ -142,6 +154,7 @@ def handle_chat_template(model_id, variant, template_src):
        with open(output_file, 'w') as f:
            f.write(output)
    logger.info('')
 def main():
    for dir in ['tests/chat/templates', 'tests/chat/goldens']: