agent: --style

This commit is contained in:
ochafik 2024-03-29 18:09:31 +00:00
parent 9ab493f67e
commit e0c8af4ba0
4 changed files with 65 additions and 36 deletions

View file

@ -8,7 +8,6 @@ python -m examples.agent \
--tools examples/agent/tools/example_math_tools.py \
--goal "What is the sum of 2535 squared and 32222000403 then multiplied by one and a half. What's a third of the result?"
```
<!-- --format float \ -->
<details>
<summary>Show output</summary>
@ -37,6 +36,23 @@ python -m examples.agent \
<summary>Show output</summary>
```bash
💭 I will first get the current weather in San Francisco, then get the 4-day weather forecast for both San Francisco and Glasgow.
⚙️ get_current_weather(location=San Francisco, format=fahrenheit) -> ...
💭 I will first get the current weather in San Francisco, then get the 4-day weather forecast for both San Francisco and Glasgow.
⚙️ get_n_day_weather_forecast(location=San Francisco, format=fahrenheit, num_days=4) -> ...
💭 I will first get the current weather in San Francisco, then get the 4-day weather forecast for both San Francisco and Glasgow.
⚙️ get_n_day_weather_forecast(location=Glasgow, format=celsius, num_days=4) -> ...
The current weather in San Francisco is sunny and 87.8F. Here is the 4-day weather forecast:
For San Francisco:
- In 1 day: Cloudy, 60.8F
- In 2 days: Sunny, 73.4F
- In 3 days: Cloudy, 62.6F
For Glasgow:
- In 1 day: Cloudy, 16C
- In 2 days: Sunny, 23C
- In 3 days: Cloudy, 17C
```
</details>

View file

@ -12,6 +12,7 @@ from examples.json_schema_to_grammar import SchemaConverter
from examples.agent.tools.std_tools import StandardTools
from examples.openai.api import ChatCompletionRequest, ChatCompletionResponse, Message, Tool, ToolFunction
from examples.agent.utils import collect_functions, load_module
from examples.openai.prompting import ToolsPromptStyle
def _get_params_schema(fn: Callable, verbose):
converter = SchemaConverter(prop_order={}, allow_fetch=False, dotall=False, raw_pattern=False)
@ -130,6 +131,7 @@ def main(
auth: Optional[str] = None,
parallel_calls: Optional[bool] = True,
verbose: bool = False,
style: Optional[ToolsPromptStyle] = None,
model: Annotated[Optional[Path], typer.Option("--model", "-m")] = "models/7B/ggml-model-f16.gguf",
endpoint: Optional[str] = None,
@ -175,8 +177,8 @@ def main(
"--model", model,
*(['--verbose'] if verbose else []),
*(['--parallel-calls'] if parallel_calls else []),
*(['--context-length={context_length}'] if context_length else []),
*([])
*([f'--context-length={context_length}'] if context_length else []),
*([f'--style={style.value}'] if style else []),
]
server_process = subprocess.Popen(cmd, stdout=sys.stderr)
atexit.register(server_process.kill)
@ -196,7 +198,7 @@ def main(
if std_tools:
tool_functions.extend(collect_functions(StandardTools))
response_model = None#str
response_model = str
if format:
if format in types:
response_model = types[format]
@ -245,6 +247,7 @@ def main(
}]
)
print(result if response_model else f'➡️ {result}')
# exit(0)
if __name__ == '__main__':
typer.run(main)

View file

@ -14,45 +14,45 @@ from examples.openai.api import Tool, Message, FunctionCall, ToolCall
from examples.openai.gguf_kvs import GGUFKeyValues, Keys
from examples.openai.ts_converter import SchemaToTypeScriptConverter
_THOUGHT_KEY = "thought"
# _THOUGHT_KEY = "thought_about_next_step_only"
# While the API will be usable with a generic tools usage like OpenAI,
# (see https://cookbook.openai.com/examples/how_to_call_functions_with_chat_models),
# each model may need specific prompting (and/or constrained output,
# especially for models not fine-tuned for tool usage / function calling).
class ToolsPromptStyle(Enum):
class ToolsPromptStyle(str, Enum):
# Short prompt w/ <tools>schemas</tools>, <tool_call>...</tool_call> output
TOOLS_SHORT = 1
TOOLS_SHORT = "short"
# Longer prompt w/ <tools>schemas</tools>, <tool_call>...</tool_call> output
TOOLS_LONG = 2
TOOLS_LONG = "long"
# Bespoke constrained output format that favours thought and reasoning
# while allowing unambiguous parsing of parallel tool calling.
TOOLS_BESPOKE = 3
TOOLS_CONSTRAINED = "thoughtful_steps"
# Large prompt for https://huggingface.co/NousResearch/Hermes-2-Pro-Mistral-7B
# <tool_call>...</tool_call> output
# Requires:
# - git clone https://github.com/NousResearch/Hermes-Function-Calling examples/openai/hermes_function_calling
# - Set large context length as their prompts are super long
TOOLS_HERMES_2_PRO = 4
TOOLS_HERMES_2_PRO = "tools_hermes_2_pro"
# Seems to want to escape underscores in tool names and in the <tool\_call>...</tool\_call> tags
TOOLS_MISTRAL = 5
TOOLS_MIXTRAL = "mixtral"
# Short prompt w/ TypeScript definitions for https://github.com/MeetKai/functionary
# https://github.com/MeetKai/functionary/blob/main/functionary/prompt_template/prompt_template_v2.py
# Note: see this prior attempt to support Functionary: https://github.com/ggerganov/llama.cpp/pull/5695
TYPESCRIPT_FUNCTIONARY_V2 = 6
TYPESCRIPT_FUNCTIONARY_V2 = "functionary_v2"
def raise_exception(msg: str):
raise Exception(msg)
class ChatTemplate(BaseModel):
template: str
@property
def tool_style(self) -> 'ToolsPromptStyle':
return self._tool_style
inferred_tool_style: Optional['ToolsPromptStyle'] = None
def __init__(self, template: str, eos_token: str, bos_token: str):
super().__init__(template=template
@ -65,12 +65,12 @@ class ChatTemplate(BaseModel):
self._strict_user_assistant_alternation = "{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception" in template
if "<|recipient|>' + tool_call['function']['name']" in template:
self._tool_style = ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2
self.inferred_tool_style = ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2
else:
self._tool_style = ToolsPromptStyle.TOOLS_BESPOKE
# self._tool_style = ToolsPromptStyle.TOOLS_LONG
# self._tool_style = ToolsPromptStyle.TOOLS_HERMES_2_PRO
# self._tool_style = ToolsPromptStyle.TOOLS_MISTRAL
self.inferred_tool_style = ToolsPromptStyle.TOOLS_CONSTRAINED
# self.inferred_tool_style = ToolsPromptStyle.TOOLS_LONG
# self.inferred_tool_style = ToolsPromptStyle.TOOLS_HERMES_2_PRO
# self.inferred_tool_style = ToolsPromptStyle.TOOLS_MIXTRAL
# TODO: Test whether the template supports formatting tool_calls
@ -399,7 +399,7 @@ def _make_bespoke_schema(response_schema, tool_call_schema, parallel_calls):
"type": "object",
"properties": {
# "original_goal": {"title": "Original Goal", "type": "string"},
"thought_about_next_step_only": {
_THOUGHT_KEY: {
"title": "Thought about next step",
# "title": "Thought about how the next step brings us closer to achieving the original goal",
"type": "string"
@ -430,7 +430,7 @@ def _make_bespoke_schema(response_schema, tool_call_schema, parallel_calls):
]
},
},
"required": ["original_goal", "thought_about_next_step_only", "next_step"]
"required": ["original_goal", _THOUGHT_KEY, "next_step"]
# "required": ["next_step"]
}
@ -505,7 +505,7 @@ class BespokeToolsChatHandler(ChatHandler):
elif 'tool_calls' in next_step:
return Message(
role="assistant",
content=data["thought_about_next_step_only"] if "thought_about_next_step_only" in data else None,
content=data.get(_THOUGHT_KEY),
tool_calls=[
ToolCall(id=gen_callid(), function=FunctionCall(**tc))
for tc in next_step['tool_calls']
@ -539,20 +539,28 @@ _LONG_TEMPLATE='\n'.join([
# 'This is not hypothetical, you're not asked what you would do. If you need a tool called, just call it with <tool_call>...</tool_call>.''',
])
def get_chat_handler(args: ChatHandlerArgs, parallel_calls: bool) -> ChatHandler:
def get_chat_handler(args: ChatHandlerArgs, parallel_calls: bool, tool_style: Optional[ToolsPromptStyle] = None) -> ChatHandler:
tool_style = tool_style or args.chat_template.inferred_tool_style
if not args.tools:
return NoToolsChatHandler(args)
elif args.chat_template.tool_style == ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2:
return FunctionaryToolsChatHandler(args, parallel_calls=parallel_calls)
elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_SHORT:
return TemplatedToolsChatHandler(args, _SHORT_TEMPLATE, parallel_calls=parallel_calls)
elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_LONG:
return TemplatedToolsChatHandler(args, _LONG_TEMPLATE, parallel_calls=parallel_calls)
elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_MISTRAL:
return TemplatedToolsChatHandler(args, _LONG_TEMPLATE, parallel_calls=parallel_calls, escapes_underscores=True)
elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_BESPOKE:
elif tool_style == ToolsPromptStyle.TOOLS_CONSTRAINED:
return BespokeToolsChatHandler(args, parallel_calls=parallel_calls)
elif args.chat_template.tool_style == ToolsPromptStyle.TOOLS_HERMES_2_PRO:
elif tool_style == ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2:
return FunctionaryToolsChatHandler(args, parallel_calls=parallel_calls)
elif tool_style == ToolsPromptStyle.TOOLS_SHORT:
return TemplatedToolsChatHandler(args, _SHORT_TEMPLATE, parallel_calls=parallel_calls)
elif tool_style == ToolsPromptStyle.TOOLS_LONG:
return TemplatedToolsChatHandler(args, _LONG_TEMPLATE, parallel_calls=parallel_calls)
elif tool_style == ToolsPromptStyle.TOOLS_MIXTRAL:
return TemplatedToolsChatHandler(args, _LONG_TEMPLATE, parallel_calls=parallel_calls, escapes_underscores=True)
elif tool_style == ToolsPromptStyle.TOOLS_HERMES_2_PRO:
return Hermes2ProToolsChatHandler(args)
else:
raise ValueError(f"Unsupported tool call style: {args.chat_template.tool_style}")

View file

@ -12,7 +12,7 @@ sys.path.insert(0, str(Path(__file__).parent.parent.parent))
from examples.openai.llama_cpp_server_api import LlamaCppServerCompletionRequest
from examples.openai.gguf_kvs import GGUFKeyValues, Keys
from examples.openai.api import ChatCompletionResponse, Choice, Message, ChatCompletionRequest, Usage
from examples.openai.prompting import ChatHandlerArgs, ChatTemplate, get_chat_handler, ChatHandler
from examples.openai.prompting import ChatHandlerArgs, ChatTemplate, ToolsPromptStyle, get_chat_handler, ChatHandler
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
@ -32,6 +32,7 @@ def main(
host: str = "localhost",
port: int = 8080,
parallel_calls: Optional[bool] = True,
style: Optional[ToolsPromptStyle] = None,
auth: Optional[str] = None,
verbose: bool = False,
context_length: Optional[int] = None,
@ -92,7 +93,8 @@ def main(
chat_handler = get_chat_handler(
ChatHandlerArgs(chat_template=chat_template, response_schema=response_schema, tools=chat_request.tools),
parallel_calls=parallel_calls
parallel_calls=parallel_calls,
tool_style=style,
)
messages = chat_request.messages