diff --git a/examples/agent/README.md b/examples/agent/README.md
index 045d23719..9ca8a99fd 100644
--- a/examples/agent/README.md
+++ b/examples/agent/README.md
@@ -8,7 +8,6 @@ python -m examples.agent \
--tools examples/agent/tools/example_math_tools.py \
--goal "What is the sum of 2535 squared and 32222000403 then multiplied by one and a half. What's a third of the result?"
```
-
Show output
@@ -37,6 +36,23 @@ python -m examples.agent \
Show output
```bash
+💭 I will first get the current weather in San Francisco, then get the 4-day weather forecast for both San Francisco and Glasgow.
+⚙️ get_current_weather(location=San Francisco, format=fahrenheit) -> ...
+💭 I will first get the current weather in San Francisco, then get the 4-day weather forecast for both San Francisco and Glasgow.
+⚙️ get_n_day_weather_forecast(location=San Francisco, format=fahrenheit, num_days=4) -> ...
+💭 I will first get the current weather in San Francisco, then get the 4-day weather forecast for both San Francisco and Glasgow.
+⚙️ get_n_day_weather_forecast(location=Glasgow, format=celsius, num_days=4) -> ...
+The current weather in San Francisco is sunny and 87.8F. Here is the 4-day weather forecast:
+
+For San Francisco:
+- In 1 day: Cloudy, 60.8F
+- In 2 days: Sunny, 73.4F
+- In 3 days: Cloudy, 62.6F
+
+For Glasgow:
+- In 1 day: Cloudy, 16C
+- In 2 days: Sunny, 23C
+- In 3 days: Cloudy, 17C
```
diff --git a/examples/agent/agent.py b/examples/agent/agent.py
index ed428af68..96355d225 100644
--- a/examples/agent/agent.py
+++ b/examples/agent/agent.py
@@ -12,6 +12,7 @@ from examples.json_schema_to_grammar import SchemaConverter
from examples.agent.tools.std_tools import StandardTools
from examples.openai.api import ChatCompletionRequest, ChatCompletionResponse, Message, Tool, ToolFunction
from examples.agent.utils import collect_functions, load_module
+from examples.openai.prompting import ToolsPromptStyle
def _get_params_schema(fn: Callable, verbose):
converter = SchemaConverter(prop_order={}, allow_fetch=False, dotall=False, raw_pattern=False)
@@ -130,6 +131,7 @@ def main(
auth: Optional[str] = None,
parallel_calls: Optional[bool] = True,
verbose: bool = False,
+ style: Optional[ToolsPromptStyle] = None,
model: Annotated[Optional[Path], typer.Option("--model", "-m")] = "models/7B/ggml-model-f16.gguf",
endpoint: Optional[str] = None,
@@ -175,8 +177,8 @@ def main(
"--model", model,
*(['--verbose'] if verbose else []),
*(['--parallel-calls'] if parallel_calls else []),
- *(['--context-length={context_length}'] if context_length else []),
- *([])
+ *([f'--context-length={context_length}'] if context_length else []),
+ *([f'--style={style.value}'] if style else []),
]
server_process = subprocess.Popen(cmd, stdout=sys.stderr)
atexit.register(server_process.kill)
@@ -196,7 +198,7 @@ def main(
if std_tools:
tool_functions.extend(collect_functions(StandardTools))
- response_model = None#str
+ response_model = str
if format:
if format in types:
response_model = types[format]
@@ -245,6 +247,7 @@ def main(
}]
)
print(result if response_model else f'➡️ {result}')
+ # exit(0)
if __name__ == '__main__':
typer.run(main)
diff --git a/examples/openai/prompting.py b/examples/openai/prompting.py
index 173810139..0c3f2fcb2 100644
--- a/examples/openai/prompting.py
+++ b/examples/openai/prompting.py
@@ -14,45 +14,45 @@ from examples.openai.api import Tool, Message, FunctionCall, ToolCall
from examples.openai.gguf_kvs import GGUFKeyValues, Keys
from examples.openai.ts_converter import SchemaToTypeScriptConverter
+_THOUGHT_KEY = "thought"
+# _THOUGHT_KEY = "thought_about_next_step_only"
+
# While the API will be usable with a generic tools usage like OpenAI,
# (see https://cookbook.openai.com/examples/how_to_call_functions_with_chat_models),
# each model may need specific prompting (and/or constrained output,
# especially for models not fine-tuned for tool usage / function calling).
-class ToolsPromptStyle(Enum):
+class ToolsPromptStyle(str, Enum):
# Short prompt w/ schemas, ... output
- TOOLS_SHORT = 1
+ TOOLS_SHORT = "short"
# Longer prompt w/ schemas, ... output
- TOOLS_LONG = 2
+ TOOLS_LONG = "long"
# Bespoke constrained output format that favours thought and reasoning
# while allowing unambiguous parsing of parallel tool calling.
- TOOLS_BESPOKE = 3
+ TOOLS_CONSTRAINED = "thoughtful_steps"
# Large prompt for https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B
# ... output
# Requires:
# - git clone https://github.com/NousResearch/Hermes-Function-Calling examples/openai/hermes_function_calling
# - Set large context length as their prompts are super long
- TOOLS_HERMES_2_PRO = 4
+ TOOLS_HERMES_2_PRO = "tools_hermes_2_pro"
# Seems to want to escape underscores in tool names and in the ... tags
- TOOLS_MISTRAL = 5
+ TOOLS_MIXTRAL = "mixtral"
# Short prompt w/ TypeScript definitions for https://github.com/MeetKai/functionary
# https://github.com/MeetKai/functionary/blob/main/functionary/prompt_template/prompt_template_v2.py
# Note: see this prior attempt to support Functionary: https://github.com/ggerganov/llama.cpp/pull/5695
- TYPESCRIPT_FUNCTIONARY_V2 = 6
+ TYPESCRIPT_FUNCTIONARY_V2 = "functionary_v2"
def raise_exception(msg: str):
raise Exception(msg)
class ChatTemplate(BaseModel):
template: str
-
- @property
- def tool_style(self) -> 'ToolsPromptStyle':
- return self._tool_style
+ inferred_tool_style: Optional['ToolsPromptStyle'] = None
def __init__(self, template: str, eos_token: str, bos_token: str):
super().__init__(template=template
@@ -65,12 +65,12 @@ class ChatTemplate(BaseModel):
self._strict_user_assistant_alternation = "{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception" in template
if "<|recipient|>' + tool_call['function']['name']" in template:
- self._tool_style = ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2
+ self.inferred_tool_style = ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2
else:
- self._tool_style = ToolsPromptStyle.TOOLS_BESPOKE
- # self._tool_style = ToolsPromptStyle.TOOLS_LONG
- # self._tool_style = ToolsPromptStyle.TOOLS_HERMES_2_PRO
- # self._tool_style = ToolsPromptStyle.TOOLS_MISTRAL
+ self.inferred_tool_style = ToolsPromptStyle.TOOLS_CONSTRAINED
+ # self.inferred_tool_style = ToolsPromptStyle.TOOLS_LONG
+ # self.inferred_tool_style = ToolsPromptStyle.TOOLS_HERMES_2_PRO
+ # self.inferred_tool_style = ToolsPromptStyle.TOOLS_MIXTRAL
# TODO: Test whether the template supports formatting tool_calls
@@ -399,7 +399,7 @@ def _make_bespoke_schema(response_schema, tool_call_schema, parallel_calls):
"type": "object",
"properties": {
# "original_goal": {"title": "Original Goal", "type": "string"},
- "thought_about_next_step_only": {
+ _THOUGHT_KEY: {
"title": "Thought about next step",
# "title": "Thought about how the next step brings us closer to achieving the original goal",
"type": "string"
@@ -430,7 +430,7 @@ def _make_bespoke_schema(response_schema, tool_call_schema, parallel_calls):
]
},
},
- "required": ["original_goal", "thought_about_next_step_only", "next_step"]
+ "required": ["original_goal", _THOUGHT_KEY, "next_step"]
# "required": ["next_step"]
}
@@ -505,7 +505,7 @@ class BespokeToolsChatHandler(ChatHandler):
elif 'tool_calls' in next_step:
return Message(
role="assistant",
- content=data["thought_about_next_step_only"] if "thought_about_next_step_only" in data else None,
+ content=data.get(_THOUGHT_KEY),
tool_calls=[
ToolCall(id=gen_callid(), function=FunctionCall(**tc))
for tc in next_step['tool_calls']
@@ -539,20 +539,28 @@ _LONG_TEMPLATE='\n'.join([
# 'This is not hypothetical, you're not asked what you would do. If you need a tool called, just call it with ....''',
])
-def get_chat_handler(args: ChatHandlerArgs, parallel_calls: bool) -> ChatHandler:
+def get_chat_handler(args: ChatHandlerArgs, parallel_calls: bool, tool_style: Optional[ToolsPromptStyle] = None) -> ChatHandler:
+ tool_style = tool_style or args.chat_template.inferred_tool_style
+
if not args.tools:
return NoToolsChatHandler(args)
- elif args.chat_template.tool_style == ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2:
- return FunctionaryToolsChatHandler(args, parallel_calls=parallel_calls)
- elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_SHORT:
- return TemplatedToolsChatHandler(args, _SHORT_TEMPLATE, parallel_calls=parallel_calls)
- elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_LONG:
- return TemplatedToolsChatHandler(args, _LONG_TEMPLATE, parallel_calls=parallel_calls)
- elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_MISTRAL:
- return TemplatedToolsChatHandler(args, _LONG_TEMPLATE, parallel_calls=parallel_calls, escapes_underscores=True)
- elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_BESPOKE:
+
+ elif tool_style == ToolsPromptStyle.TOOLS_CONSTRAINED:
return BespokeToolsChatHandler(args, parallel_calls=parallel_calls)
- elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_HERMES_2_PRO:
+
+ elif tool_style == ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2:
+ return FunctionaryToolsChatHandler(args, parallel_calls=parallel_calls)
+
+ elif tool_style == ToolsPromptStyle.TOOLS_SHORT:
+ return TemplatedToolsChatHandler(args, _SHORT_TEMPLATE, parallel_calls=parallel_calls)
+
+ elif tool_style == ToolsPromptStyle.TOOLS_LONG:
+ return TemplatedToolsChatHandler(args, _LONG_TEMPLATE, parallel_calls=parallel_calls)
+
+ elif tool_style == ToolsPromptStyle.TOOLS_MIXTRAL:
+ return TemplatedToolsChatHandler(args, _LONG_TEMPLATE, parallel_calls=parallel_calls, escapes_underscores=True)
+
+ elif tool_style == ToolsPromptStyle.TOOLS_HERMES_2_PRO:
return Hermes2ProToolsChatHandler(args)
else:
raise ValueError(f"Unsupported tool call style: {args.chat_template.tool_style}")
diff --git a/examples/openai/server.py b/examples/openai/server.py
index ccf8fddce..a8abe8c8a 100644
--- a/examples/openai/server.py
+++ b/examples/openai/server.py
@@ -12,7 +12,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
from examples.openai.llama_cpp_server_api import LlamaCppServerCompletionRequest
from examples.openai.gguf_kvs import GGUFKeyValues, Keys
from examples.openai.api import ChatCompletionResponse, Choice, Message, ChatCompletionRequest, Usage
-from examples.openai.prompting import ChatHandlerArgs, ChatTemplate, get_chat_handler, ChatHandler
+from examples.openai.prompting import ChatHandlerArgs, ChatTemplate, ToolsPromptStyle, get_chat_handler, ChatHandler
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
@@ -32,6 +32,7 @@ def main(
host: str = "localhost",
port: int = 8080,
parallel_calls: Optional[bool] = True,
+ style: Optional[ToolsPromptStyle] = None,
auth: Optional[str] = None,
verbose: bool = False,
context_length: Optional[int] = None,
@@ -92,7 +93,8 @@ def main(
chat_handler = get_chat_handler(
ChatHandlerArgs(chat_template=chat_template, response_schema=response_schema, tools=chat_request.tools),
- parallel_calls=parallel_calls
+ parallel_calls=parallel_calls,
+ tool_style=style,
)
messages = chat_request.messages