agent: --style

2024-03-29 18:09:31 +00:00 · 2024-03-29 18:09:31 +00:00 · e0c8af4ba0
commit e0c8af4ba0
parent 9ab493f67e
4 changed files with 65 additions and 36 deletions
--- a/examples/agent/README.md
+++ b/examples/agent/README.md
@ -8,7 +8,6 @@ python -m examples.agent \
    --tools examples/agent/tools/example_math_tools.py \
    --goal "What is the sum of 2535 squared and 32222000403 then multiplied by one and a half. What's a third of the result?"
 ```
 <!-- --format float \ -->
 <details>
 <summary>Show output</summary>
@ -37,6 +36,23 @@ python -m examples.agent \
 <summary>Show output</summary>
 ```bash
 💭 I will first get the current weather in San Francisco, then get the 4-day weather forecast for both San Francisco and Glasgow.
 ⚙️  get_current_weather(location=San Francisco, format=fahrenheit) -> ...
 💭 I will first get the current weather in San Francisco, then get the 4-day weather forecast for both San Francisco and Glasgow.
 ⚙️  get_n_day_weather_forecast(location=San Francisco, format=fahrenheit, num_days=4) -> ...
 💭 I will first get the current weather in San Francisco, then get the 4-day weather forecast for both San Francisco and Glasgow.
 ⚙️  get_n_day_weather_forecast(location=Glasgow, format=celsius, num_days=4) -> ...
 The current weather in San Francisco is sunny and 87.8F. Here is the 4-day weather forecast:
 For San Francisco:
 - In 1 day: Cloudy, 60.8F
 - In 2 days: Sunny, 73.4F
 - In 3 days: Cloudy, 62.6F
 For Glasgow:
 - In 1 day: Cloudy, 16C
 - In 2 days: Sunny, 23C
 - In 3 days: Cloudy, 17C
 ```
 </details>
--- a/examples/agent/agent.py
+++ b/examples/agent/agent.py
@ -12,6 +12,7 @@ from examples.json_schema_to_grammar import SchemaConverter
 from examples.agent.tools.std_tools import StandardTools
 from examples.openai.api import ChatCompletionRequest, ChatCompletionResponse, Message, Tool, ToolFunction
 from examples.agent.utils import collect_functions, load_module
 from examples.openai.prompting import ToolsPromptStyle
 def _get_params_schema(fn: Callable, verbose):
    converter = SchemaConverter(prop_order={}, allow_fetch=False, dotall=False, raw_pattern=False)
@ -130,6 +131,7 @@ def main(
    auth: Optional[str] = None,
    parallel_calls: Optional[bool] = True,
    verbose: bool = False,
    style: Optional[ToolsPromptStyle] = None,
    model: Annotated[Optional[Path], typer.Option("--model", "-m")] = "models/7B/ggml-model-f16.gguf",
    endpoint: Optional[str] = None,
@ -175,8 +177,8 @@ def main(
            "--model", model,
            *(['--verbose'] if verbose else []),
            *(['--parallel-calls'] if parallel_calls else []),
-            *(['--context-length={context_length}'] if context_length else []),
+            *([f'--context-length={context_length}'] if context_length else []),
-            *([])
+            *([f'--style={style.value}'] if style else []),
        ]
        server_process = subprocess.Popen(cmd, stdout=sys.stderr)
        atexit.register(server_process.kill)
@ -196,7 +198,7 @@ def main(
    if std_tools:
        tool_functions.extend(collect_functions(StandardTools))
-    response_model = None#str
+    response_model = str
    if format:
        if format in types:
            response_model = types[format]
@ -245,6 +247,7 @@ def main(
        }]
    )
    print(result if response_model else f'➡️ {result}')
    # exit(0)
 if __name__ == '__main__':
    typer.run(main)
--- a/examples/openai/prompting.py
+++ b/examples/openai/prompting.py
@ -14,45 +14,45 @@ from examples.openai.api import Tool, Message, FunctionCall, ToolCall
 from examples.openai.gguf_kvs import GGUFKeyValues, Keys
 from examples.openai.ts_converter import SchemaToTypeScriptConverter
 _THOUGHT_KEY = "thought"
 # _THOUGHT_KEY = "thought_about_next_step_only"
 # While the API will be usable with a generic tools usage like OpenAI,
 # (see https://cookbook.openai.com/examples/how_to_call_functions_with_chat_models),
 # each model may need specific prompting (and/or constrained output,
 # especially for models not fine-tuned for tool usage / function calling).
-class ToolsPromptStyle(Enum):
+class ToolsPromptStyle(str, Enum):
    # Short prompt w/ <tools>schemas</tools>, <tool_call>...</tool_call> output
-    TOOLS_SHORT = 1
+    TOOLS_SHORT = "short"
    # Longer prompt w/ <tools>schemas</tools>, <tool_call>...</tool_call> output
-    TOOLS_LONG = 2
+    TOOLS_LONG = "long"
    # Bespoke constrained output format that favours thought and reasoning
    # while allowing unambiguous parsing of parallel tool calling.
-    TOOLS_BESPOKE = 3
+    TOOLS_CONSTRAINED = "thoughtful_steps"
    # Large prompt for https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B
    # <tool_call>...</tool_call> output
    # Requires:
    # - git clone https://github.com/NousResearch/Hermes-Function-Calling examples/openai/hermes_function_calling
    # - Set large context length as their prompts are super long
-    TOOLS_HERMES_2_PRO = 4
+    TOOLS_HERMES_2_PRO = "tools_hermes_2_pro"
    # Seems to want to escape underscores in tool names and in the <tool\_call>...</tool\_call> tags
-    TOOLS_MISTRAL = 5
+    TOOLS_MIXTRAL = "mixtral"
    # Short prompt w/ TypeScript definitions for https://github.com/MeetKai/functionary
    # https://github.com/MeetKai/functionary/blob/main/functionary/prompt_template/prompt_template_v2.py
    # Note: see this prior attempt to support Functionary: https://github.com/ggerganov/llama.cpp/pull/5695
-    TYPESCRIPT_FUNCTIONARY_V2 = 6
+    TYPESCRIPT_FUNCTIONARY_V2 = "functionary_v2"
 def raise_exception(msg: str):
    raise Exception(msg)
 class ChatTemplate(BaseModel):
    template: str
-
+    inferred_tool_style: Optional['ToolsPromptStyle'] = None
    @property
    def tool_style(self) -> 'ToolsPromptStyle':
        return self._tool_style
    def __init__(self, template: str, eos_token: str, bos_token: str):
        super().__init__(template=template
@ -65,12 +65,12 @@ class ChatTemplate(BaseModel):
        self._strict_user_assistant_alternation = "{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception" in template
        if "<|recipient|>' + tool_call['function']['name']" in template:
-            self._tool_style = ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2
+            self.inferred_tool_style = ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2
        else:
-            self._tool_style = ToolsPromptStyle.TOOLS_BESPOKE
+            self.inferred_tool_style = ToolsPromptStyle.TOOLS_CONSTRAINED
-            # self._tool_style = ToolsPromptStyle.TOOLS_LONG
+            # self.inferred_tool_style = ToolsPromptStyle.TOOLS_LONG
-            # self._tool_style = ToolsPromptStyle.TOOLS_HERMES_2_PRO
+            # self.inferred_tool_style = ToolsPromptStyle.TOOLS_HERMES_2_PRO
-            # self._tool_style = ToolsPromptStyle.TOOLS_MISTRAL
+            # self.inferred_tool_style = ToolsPromptStyle.TOOLS_MIXTRAL
        # TODO: Test whether the template supports formatting tool_calls
@ -399,7 +399,7 @@ def _make_bespoke_schema(response_schema, tool_call_schema, parallel_calls):
        "type": "object",
        "properties": {
            # "original_goal": {"title": "Original Goal", "type": "string"},
-            "thought_about_next_step_only": {
+            _THOUGHT_KEY: {
                "title": "Thought about next step",
                # "title": "Thought about how the next step brings us closer to achieving the original goal",
                "type": "string"
@ -430,7 +430,7 @@ def _make_bespoke_schema(response_schema, tool_call_schema, parallel_calls):
                ]
            },
        },
-        "required": ["original_goal", "thought_about_next_step_only", "next_step"]
+        "required": ["original_goal", _THOUGHT_KEY, "next_step"]
        # "required": ["next_step"]
    }
@ -505,7 +505,7 @@ class BespokeToolsChatHandler(ChatHandler):
        elif 'tool_calls' in next_step:
            return Message(
                role="assistant",
-                content=data["thought_about_next_step_only"] if "thought_about_next_step_only" in data else None,
+                content=data.get(_THOUGHT_KEY),
                tool_calls=[
                    ToolCall(id=gen_callid(), function=FunctionCall(**tc))
                    for tc in next_step['tool_calls']
@ -539,20 +539,28 @@ _LONG_TEMPLATE='\n'.join([
    # 'This is not hypothetical, you're not asked what you would do. If you need a tool called, just call it with <tool_call>...</tool_call>.''',
 ])
-def get_chat_handler(args: ChatHandlerArgs, parallel_calls: bool) -> ChatHandler:
+def get_chat_handler(args: ChatHandlerArgs, parallel_calls: bool, tool_style: Optional[ToolsPromptStyle] = None) -> ChatHandler:
    tool_style = tool_style or args.chat_template.inferred_tool_style
    if not args.tools:
        return NoToolsChatHandler(args)
-    elif args.chat_template.tool_style == ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2:
+
-        return FunctionaryToolsChatHandler(args, parallel_calls=parallel_calls)
+    elif tool_style == ToolsPromptStyle.TOOLS_CONSTRAINED:
    elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_SHORT:
        return TemplatedToolsChatHandler(args, _SHORT_TEMPLATE, parallel_calls=parallel_calls)
    elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_LONG:
        return TemplatedToolsChatHandler(args, _LONG_TEMPLATE, parallel_calls=parallel_calls)
    elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_MISTRAL:
        return TemplatedToolsChatHandler(args, _LONG_TEMPLATE, parallel_calls=parallel_calls, escapes_underscores=True)
    elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_BESPOKE:
        return BespokeToolsChatHandler(args, parallel_calls=parallel_calls)
-    elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_HERMES_2_PRO:
+
    elif tool_style == ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2:
        return FunctionaryToolsChatHandler(args, parallel_calls=parallel_calls)
    elif tool_style == ToolsPromptStyle.TOOLS_SHORT:
        return TemplatedToolsChatHandler(args, _SHORT_TEMPLATE, parallel_calls=parallel_calls)
    elif tool_style == ToolsPromptStyle.TOOLS_LONG:
        return TemplatedToolsChatHandler(args, _LONG_TEMPLATE, parallel_calls=parallel_calls)
    elif tool_style == ToolsPromptStyle.TOOLS_MIXTRAL:
        return TemplatedToolsChatHandler(args, _LONG_TEMPLATE, parallel_calls=parallel_calls, escapes_underscores=True)
    elif tool_style == ToolsPromptStyle.TOOLS_HERMES_2_PRO:
        return Hermes2ProToolsChatHandler(args)
    else:
        raise ValueError(f"Unsupported tool call style: {args.chat_template.tool_style}")
--- a/examples/openai/server.py
+++ b/examples/openai/server.py
@ -12,7 +12,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
 from examples.openai.llama_cpp_server_api import LlamaCppServerCompletionRequest
 from examples.openai.gguf_kvs import GGUFKeyValues, Keys
 from examples.openai.api import ChatCompletionResponse, Choice, Message, ChatCompletionRequest, Usage
-from examples.openai.prompting import ChatHandlerArgs, ChatTemplate, get_chat_handler, ChatHandler
+from examples.openai.prompting import ChatHandlerArgs, ChatTemplate, ToolsPromptStyle, get_chat_handler, ChatHandler
 from fastapi import FastAPI, Request
 from fastapi.responses import JSONResponse
@ -32,6 +32,7 @@ def main(
    host: str = "localhost",
    port: int = 8080,
    parallel_calls: Optional[bool] = True,
    style: Optional[ToolsPromptStyle] = None,
    auth: Optional[str] = None,
    verbose: bool = False,
    context_length: Optional[int] = None,
@ -92,7 +93,8 @@ def main(
        chat_handler = get_chat_handler(
            ChatHandlerArgs(chat_template=chat_template, response_schema=response_schema, tools=chat_request.tools),
-            parallel_calls=parallel_calls
+            parallel_calls=parallel_calls,
            tool_style=style,
        )
        messages = chat_request.messages