diff --git a/examples/agent/README.md b/examples/agent/README.md index 045d23719..9ca8a99fd 100644 --- a/examples/agent/README.md +++ b/examples/agent/README.md @@ -8,7 +8,6 @@ python -m examples.agent \ --tools examples/agent/tools/example_math_tools.py \ --goal "What is the sum of 2535 squared and 32222000403 then multiplied by one and a half. What's a third of the result?" ``` -
Show output @@ -37,6 +36,23 @@ python -m examples.agent \ Show output ```bash +💭 I will first get the current weather in San Francisco, then get the 4-day weather forecast for both San Francisco and Glasgow. +⚙️ get_current_weather(location=San Francisco, format=fahrenheit) -> ... +💭 I will first get the current weather in San Francisco, then get the 4-day weather forecast for both San Francisco and Glasgow. +⚙️ get_n_day_weather_forecast(location=San Francisco, format=fahrenheit, num_days=4) -> ... +💭 I will first get the current weather in San Francisco, then get the 4-day weather forecast for both San Francisco and Glasgow. +⚙️ get_n_day_weather_forecast(location=Glasgow, format=celsius, num_days=4) -> ... +The current weather in San Francisco is sunny and 87.8F. Here is the 4-day weather forecast: + +For San Francisco: +- In 1 day: Cloudy, 60.8F +- In 2 days: Sunny, 73.4F +- In 3 days: Cloudy, 62.6F + +For Glasgow: +- In 1 day: Cloudy, 16C +- In 2 days: Sunny, 23C +- In 3 days: Cloudy, 17C ```
diff --git a/examples/agent/agent.py b/examples/agent/agent.py index ed428af68..96355d225 100644 --- a/examples/agent/agent.py +++ b/examples/agent/agent.py @@ -12,6 +12,7 @@ from examples.json_schema_to_grammar import SchemaConverter from examples.agent.tools.std_tools import StandardTools from examples.openai.api import ChatCompletionRequest, ChatCompletionResponse, Message, Tool, ToolFunction from examples.agent.utils import collect_functions, load_module +from examples.openai.prompting import ToolsPromptStyle def _get_params_schema(fn: Callable, verbose): converter = SchemaConverter(prop_order={}, allow_fetch=False, dotall=False, raw_pattern=False) @@ -130,6 +131,7 @@ def main( auth: Optional[str] = None, parallel_calls: Optional[bool] = True, verbose: bool = False, + style: Optional[ToolsPromptStyle] = None, model: Annotated[Optional[Path], typer.Option("--model", "-m")] = "models/7B/ggml-model-f16.gguf", endpoint: Optional[str] = None, @@ -175,8 +177,8 @@ def main( "--model", model, *(['--verbose'] if verbose else []), *(['--parallel-calls'] if parallel_calls else []), - *(['--context-length={context_length}'] if context_length else []), - *([]) + *([f'--context-length={context_length}'] if context_length else []), + *([f'--style={style.value}'] if style else []), ] server_process = subprocess.Popen(cmd, stdout=sys.stderr) atexit.register(server_process.kill) @@ -196,7 +198,7 @@ def main( if std_tools: tool_functions.extend(collect_functions(StandardTools)) - response_model = None#str + response_model = str if format: if format in types: response_model = types[format] @@ -245,6 +247,7 @@ def main( }] ) print(result if response_model else f'➡️ {result}') + # exit(0) if __name__ == '__main__': typer.run(main) diff --git a/examples/openai/prompting.py b/examples/openai/prompting.py index 173810139..0c3f2fcb2 100644 --- a/examples/openai/prompting.py +++ b/examples/openai/prompting.py @@ -14,45 +14,45 @@ from examples.openai.api import Tool, Message, FunctionCall, ToolCall from examples.openai.gguf_kvs import GGUFKeyValues, Keys from examples.openai.ts_converter import SchemaToTypeScriptConverter +_THOUGHT_KEY = "thought" +# _THOUGHT_KEY = "thought_about_next_step_only" + # While the API will be usable with a generic tools usage like OpenAI, # (see https://cookbook.openai.com/examples/how_to_call_functions_with_chat_models), # each model may need specific prompting (and/or constrained output, # especially for models not fine-tuned for tool usage / function calling). -class ToolsPromptStyle(Enum): +class ToolsPromptStyle(str, Enum): # Short prompt w/ schemas, ... output - TOOLS_SHORT = 1 + TOOLS_SHORT = "short" # Longer prompt w/ schemas, ... output - TOOLS_LONG = 2 + TOOLS_LONG = "long" # Bespoke constrained output format that favours thought and reasoning # while allowing unambiguous parsing of parallel tool calling. - TOOLS_BESPOKE = 3 + TOOLS_CONSTRAINED = "thoughtful_steps" # Large prompt for https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B # ... output # Requires: # - git clone https://github.com/NousResearch/Hermes-Function-Calling examples/openai/hermes_function_calling # - Set large context length as their prompts are super long - TOOLS_HERMES_2_PRO = 4 + TOOLS_HERMES_2_PRO = "tools_hermes_2_pro" # Seems to want to escape underscores in tool names and in the ... tags - TOOLS_MISTRAL = 5 + TOOLS_MIXTRAL = "mixtral" # Short prompt w/ TypeScript definitions for https://github.com/MeetKai/functionary # https://github.com/MeetKai/functionary/blob/main/functionary/prompt_template/prompt_template_v2.py # Note: see this prior attempt to support Functionary: https://github.com/ggerganov/llama.cpp/pull/5695 - TYPESCRIPT_FUNCTIONARY_V2 = 6 + TYPESCRIPT_FUNCTIONARY_V2 = "functionary_v2" def raise_exception(msg: str): raise Exception(msg) class ChatTemplate(BaseModel): template: str - - @property - def tool_style(self) -> 'ToolsPromptStyle': - return self._tool_style + inferred_tool_style: Optional['ToolsPromptStyle'] = None def __init__(self, template: str, eos_token: str, bos_token: str): super().__init__(template=template @@ -65,12 +65,12 @@ class ChatTemplate(BaseModel): self._strict_user_assistant_alternation = "{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception" in template if "<|recipient|>' + tool_call['function']['name']" in template: - self._tool_style = ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2 + self.inferred_tool_style = ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2 else: - self._tool_style = ToolsPromptStyle.TOOLS_BESPOKE - # self._tool_style = ToolsPromptStyle.TOOLS_LONG - # self._tool_style = ToolsPromptStyle.TOOLS_HERMES_2_PRO - # self._tool_style = ToolsPromptStyle.TOOLS_MISTRAL + self.inferred_tool_style = ToolsPromptStyle.TOOLS_CONSTRAINED + # self.inferred_tool_style = ToolsPromptStyle.TOOLS_LONG + # self.inferred_tool_style = ToolsPromptStyle.TOOLS_HERMES_2_PRO + # self.inferred_tool_style = ToolsPromptStyle.TOOLS_MIXTRAL # TODO: Test whether the template supports formatting tool_calls @@ -399,7 +399,7 @@ def _make_bespoke_schema(response_schema, tool_call_schema, parallel_calls): "type": "object", "properties": { # "original_goal": {"title": "Original Goal", "type": "string"}, - "thought_about_next_step_only": { + _THOUGHT_KEY: { "title": "Thought about next step", # "title": "Thought about how the next step brings us closer to achieving the original goal", "type": "string" @@ -430,7 +430,7 @@ def _make_bespoke_schema(response_schema, tool_call_schema, parallel_calls): ] }, }, - "required": ["original_goal", "thought_about_next_step_only", "next_step"] + "required": ["original_goal", _THOUGHT_KEY, "next_step"] # "required": ["next_step"] } @@ -505,7 +505,7 @@ class BespokeToolsChatHandler(ChatHandler): elif 'tool_calls' in next_step: return Message( role="assistant", - content=data["thought_about_next_step_only"] if "thought_about_next_step_only" in data else None, + content=data.get(_THOUGHT_KEY), tool_calls=[ ToolCall(id=gen_callid(), function=FunctionCall(**tc)) for tc in next_step['tool_calls'] @@ -539,20 +539,28 @@ _LONG_TEMPLATE='\n'.join([ # 'This is not hypothetical, you're not asked what you would do. If you need a tool called, just call it with ....''', ]) -def get_chat_handler(args: ChatHandlerArgs, parallel_calls: bool) -> ChatHandler: +def get_chat_handler(args: ChatHandlerArgs, parallel_calls: bool, tool_style: Optional[ToolsPromptStyle] = None) -> ChatHandler: + tool_style = tool_style or args.chat_template.inferred_tool_style + if not args.tools: return NoToolsChatHandler(args) - elif args.chat_template.tool_style == ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2: - return FunctionaryToolsChatHandler(args, parallel_calls=parallel_calls) - elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_SHORT: - return TemplatedToolsChatHandler(args, _SHORT_TEMPLATE, parallel_calls=parallel_calls) - elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_LONG: - return TemplatedToolsChatHandler(args, _LONG_TEMPLATE, parallel_calls=parallel_calls) - elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_MISTRAL: - return TemplatedToolsChatHandler(args, _LONG_TEMPLATE, parallel_calls=parallel_calls, escapes_underscores=True) - elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_BESPOKE: + + elif tool_style == ToolsPromptStyle.TOOLS_CONSTRAINED: return BespokeToolsChatHandler(args, parallel_calls=parallel_calls) - elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_HERMES_2_PRO: + + elif tool_style == ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2: + return FunctionaryToolsChatHandler(args, parallel_calls=parallel_calls) + + elif tool_style == ToolsPromptStyle.TOOLS_SHORT: + return TemplatedToolsChatHandler(args, _SHORT_TEMPLATE, parallel_calls=parallel_calls) + + elif tool_style == ToolsPromptStyle.TOOLS_LONG: + return TemplatedToolsChatHandler(args, _LONG_TEMPLATE, parallel_calls=parallel_calls) + + elif tool_style == ToolsPromptStyle.TOOLS_MIXTRAL: + return TemplatedToolsChatHandler(args, _LONG_TEMPLATE, parallel_calls=parallel_calls, escapes_underscores=True) + + elif tool_style == ToolsPromptStyle.TOOLS_HERMES_2_PRO: return Hermes2ProToolsChatHandler(args) else: raise ValueError(f"Unsupported tool call style: {args.chat_template.tool_style}") diff --git a/examples/openai/server.py b/examples/openai/server.py index ccf8fddce..a8abe8c8a 100644 --- a/examples/openai/server.py +++ b/examples/openai/server.py @@ -12,7 +12,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent.parent)) from examples.openai.llama_cpp_server_api import LlamaCppServerCompletionRequest from examples.openai.gguf_kvs import GGUFKeyValues, Keys from examples.openai.api import ChatCompletionResponse, Choice, Message, ChatCompletionRequest, Usage -from examples.openai.prompting import ChatHandlerArgs, ChatTemplate, get_chat_handler, ChatHandler +from examples.openai.prompting import ChatHandlerArgs, ChatTemplate, ToolsPromptStyle, get_chat_handler, ChatHandler from fastapi import FastAPI, Request from fastapi.responses import JSONResponse @@ -32,6 +32,7 @@ def main( host: str = "localhost", port: int = 8080, parallel_calls: Optional[bool] = True, + style: Optional[ToolsPromptStyle] = None, auth: Optional[str] = None, verbose: bool = False, context_length: Optional[int] = None, @@ -92,7 +93,8 @@ def main( chat_handler = get_chat_handler( ChatHandlerArgs(chat_template=chat_template, response_schema=response_schema, tools=chat_request.tools), - parallel_calls=parallel_calls + parallel_calls=parallel_calls, + tool_style=style, ) messages = chat_request.messages