server.py: crude reactor
This commit is contained in:
parent
59b411406f
commit
253b68d9a7
4 changed files with 373 additions and 39 deletions
|
@ -10,8 +10,12 @@ class ToolCall(BaseModel):
|
|||
type: Literal["function"] = "function"
|
||||
function: FunctionCall
|
||||
|
||||
ToolCallsTypeAdapter = TypeAdapter(list[ToolCall])
|
||||
|
||||
class Message(BaseModel):
|
||||
role: str
|
||||
name: Optional[str] = None
|
||||
tool_call_id: Optional[str] = None
|
||||
content: Optional[str]
|
||||
tool_calls: Optional[list[ToolCall]] = None
|
||||
|
||||
|
|
|
@ -41,8 +41,10 @@ class ChatTemplate(BaseModel):
|
|||
if "<|recipient|>' + tool_call['function']['name']" in template:
|
||||
self._tool_style = ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2
|
||||
else:
|
||||
self._tool_style = ToolsPromptStyle.TOOLS_BESPOKE
|
||||
# self._tool_style = ToolsPromptStyle.TOOLS_LONG
|
||||
# self._tool_style = ToolsPromptStyle.TOOLS_BESPOKE
|
||||
|
||||
self._tool_style = ToolsPromptStyle.TOOLS_LONG
|
||||
# self._tool_style = ToolsPromptStyle.TOOLS_MISTRAL
|
||||
|
||||
# TODO: Test whether the template supports formatting tool_calls
|
||||
|
||||
|
@ -87,6 +89,8 @@ class ChatTemplate(BaseModel):
|
|||
eos_token = tokens[metadata[Keys.Tokenizer.EOS_ID]])
|
||||
|
||||
def render(self, messages: list[Message], add_generation_prompt: bool, omit_bos: bool = False):
|
||||
sys.stderr.write(f'# strict_user_assistant_alternation={self._strict_user_assistant_alternation}\n')
|
||||
sys.stderr.write(f'# messages=' + "\n".join(json.dumps(m.model_dump(), indent=2) for m in messages) + '\n')
|
||||
if self._strict_user_assistant_alternation and any(m.role not in ('user', 'assistant') for m in messages):
|
||||
new_messages=[]
|
||||
i = 0
|
||||
|
@ -106,6 +110,12 @@ class ChatTemplate(BaseModel):
|
|||
content=f'{messages[i].content}\n{tc}'
|
||||
))
|
||||
i += 1
|
||||
elif messages[i].role == 'tool':
|
||||
new_messages.append(Message(
|
||||
role="user",
|
||||
content=f'TOOL(name={messages[i].name}, id={messages[i].tool_call_id}): {messages[i].content}',
|
||||
))
|
||||
i += 1
|
||||
else:
|
||||
new_messages.append(messages[i])
|
||||
i += 1
|
||||
|
@ -408,12 +418,13 @@ class FunctionaryToolsChatHandler(ChatHandler):
|
|||
content = '\n'.join(text_content).strip()
|
||||
return Message(role="assistant", content=content if content else None, tool_calls=tool_calls if tool_calls else None)
|
||||
|
||||
def _make_bespoke_schema(response_schema, tool_call_schema):
|
||||
def _make_bespoke_schema(response_schema, tool_call_schema, allow_parallel_calls=False):
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
# "original_goal": {"title": "Original Goal", "type": "string"},
|
||||
"thought": {
|
||||
"original_goal": {"title": "Original Goal", "type": "string"},
|
||||
"thought_about_next_step_only": {
|
||||
"title": "Thought about next step",
|
||||
# "title": "Thought about how the next step brings us closer to achieving the original goal",
|
||||
"type": "string"
|
||||
},
|
||||
|
@ -421,14 +432,14 @@ def _make_bespoke_schema(response_schema, tool_call_schema):
|
|||
"title": "Next Step: either a result or one or more tool calls to achieve the original goal",
|
||||
"oneOf": [
|
||||
{
|
||||
"title": "Tool Calls",
|
||||
# "title": "Tool Calls",
|
||||
"properties": {
|
||||
# "type": {
|
||||
# "const": "tool_calls"
|
||||
# },
|
||||
"tool_calls": {
|
||||
"type": "array",
|
||||
"items": tool_call_schema
|
||||
"prefixItems": tool_call_schema if allow_parallel_calls \
|
||||
else [tool_call_schema],
|
||||
}
|
||||
},
|
||||
"required": ["tool_calls"]
|
||||
|
@ -443,7 +454,7 @@ def _make_bespoke_schema(response_schema, tool_call_schema):
|
|||
]
|
||||
},
|
||||
},
|
||||
"required": ["original_goal", "thought", "next_step"]
|
||||
"required": ["original_goal", "thought_about_next_step_only", "next_step"]
|
||||
}
|
||||
|
||||
class BespokeToolsChatHandler(ChatHandler):
|
||||
|
@ -516,7 +527,7 @@ class BespokeToolsChatHandler(ChatHandler):
|
|||
elif 'tool_calls' in next_step:
|
||||
return Message(
|
||||
role="assistant",
|
||||
content=data["thought"],
|
||||
content=data["thought_about_next_step_only"],
|
||||
tool_calls=[
|
||||
ToolCall(id=gen_callid(), function=FunctionCall(**tc))
|
||||
for tc in next_step['tool_calls']
|
||||
|
|
344
examples/openai/reactor.py
Normal file
344
examples/openai/reactor.py
Normal file
|
@ -0,0 +1,344 @@
|
|||
# Usage:
|
||||
#! ./server -m some-model.gguf &
|
||||
#! pip install pydantic
|
||||
#! python examples/json-schema-pydantic-example.py
|
||||
#
|
||||
# TODO:
|
||||
# - https://github.com/NousResearch/Hermes-Function-Calling
|
||||
#
|
||||
# <|im_start|>system
|
||||
# You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags
|
||||
# You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
|
||||
# <tools> {'type': 'function', 'function': {'name': 'get_stock_fundamentals',
|
||||
# 'description': 'get_stock_fundamentals(symbol: str) -> dict - Get fundamental data for a given stock symbol using yfinance API.\n\n Args:\n symbol (str): The stock symbol.\n\n Returns:\n dict: A dictionary containing fundamental data.', 'parameters': {'type': 'object', 'properties': {'symbol': {'type': 'string'}}, 'required': ['symbol']}}}
|
||||
# </tools> Use the following pydantic model json schema for each tool call you will make: {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
|
||||
# <tool_call>
|
||||
# {'arguments': <args-dict>, 'name': <function-name>}
|
||||
# </tool_call><|im_end|>
|
||||
|
||||
from dataclasses import dataclass
|
||||
import subprocess
|
||||
import sys
|
||||
from pydantic import BaseModel, TypeAdapter
|
||||
from annotated_types import MinLen
|
||||
from typing import Annotated, Callable, List, Union, Literal, Optional, Type, get_args, get_origin
|
||||
import json, requests
|
||||
|
||||
from examples.openai.api import ToolCallsTypeAdapter
|
||||
|
||||
def type_to_str(t):
|
||||
origin = get_origin(t)
|
||||
if origin is None:
|
||||
return t.__name__
|
||||
args = get_args(t)
|
||||
return origin.__name__ + (
|
||||
f'[{", ".join(type_to_str(a) for a in args)}]' if args else ''
|
||||
)
|
||||
|
||||
def build_union_type_adapter(*types):
|
||||
src = '\n'.join([
|
||||
'from pydantic import TypeAdapter',
|
||||
'from typing import Union',
|
||||
f'_out = TypeAdapter(Union[{", ".join(type_to_str(t) for t in types)}])',
|
||||
])
|
||||
globs = {
|
||||
**globals(),
|
||||
**{t.__name__: t for t in types},
|
||||
}
|
||||
exec(src, globs)
|
||||
return globs['_out']
|
||||
|
||||
class Thought(BaseModel):
|
||||
thought: str
|
||||
|
||||
|
||||
def build_tool_call_adapter2(final_output_type, *tools):
|
||||
lines = [
|
||||
'from pydantic import BaseModel, TypeAdapter',
|
||||
'from typing import Literal, Union',
|
||||
]
|
||||
globs = {
|
||||
**globals(),
|
||||
**locals(),
|
||||
final_output_type.__name__: final_output_type,
|
||||
}
|
||||
tool_calls = []
|
||||
for fn in tools:
|
||||
# TODO: escape fn.__doc__ and fn.__doc__ to avoid comment or metadata injection!
|
||||
fn_name = fn.__name__
|
||||
fn_doc = fn.__doc__.replace('"""', "'''") if fn.__doc__ else None
|
||||
name = fn_name.replace('_', ' ').title().replace(' ', '')
|
||||
lines += [
|
||||
f'class {name}ToolArgs(BaseModel):',
|
||||
*(f' {k}: {type_to_str(v)}' for k, v in fn.__annotations__.items() if k != 'return'),
|
||||
f'class {name}ToolCall(BaseModel):',
|
||||
*([f' """{fn_doc}"""'] if fn_doc else []),
|
||||
f' name: Literal["{fn_name}"]',
|
||||
f' arguments: {name}ToolArgs',
|
||||
f'class {name}Tool(BaseModel):',
|
||||
# *([f' """{fn_doc}"""'] if fn_doc else []),
|
||||
f' id: str',
|
||||
f' type: Literal["function"]',
|
||||
f' function: {name}ToolCall',
|
||||
f' def __call__(self) -> {type_to_str(fn.__annotations__.get("return"))}:',
|
||||
f' return {fn_name}(**self.function.arguments.dict())',
|
||||
]
|
||||
tool_calls.append(f'{name}Tool')
|
||||
|
||||
lines += [
|
||||
# 'class FinalResult(BaseModel):',
|
||||
# f' result: {type_to_str(final_output_type)}',
|
||||
# 'class Response(BaseModel):',
|
||||
# f' """A response that starts with a thought about whether we need tools or not, the plan about tool usage (maybe a sequence of tool calls), and then either a final result (of type {final_output_type.__name__}) or a first tool call"""',
|
||||
# f' original_goal: str',
|
||||
# f' thought_process: str',
|
||||
# # f' thought: str',
|
||||
# f' next_step: Union[FinalResult, {", ".join(tool_calls)}]',
|
||||
# f'response_adapter = TypeAdapter(Response)'
|
||||
f'response_adapter = TypeAdapter(Union[{", ".join(tool_calls)}])',
|
||||
]
|
||||
|
||||
exec('\n'.join(lines), globs)
|
||||
return globs['response_adapter']
|
||||
|
||||
def create_completion2(*, response_model=None, max_tool_iterations=None, tools=[], endpoint="http://localhost:8080/v1/chat/completions", messages, **kwargs):
|
||||
'''
|
||||
Creates a chat completion using an OpenAI-compatible endpoint w/ JSON schema support
|
||||
(llama.cpp server, llama-cpp-python, Anyscale / Together...)
|
||||
|
||||
The response_model param takes a type (+ supports Pydantic) and behaves just as w/ Instructor (see below)
|
||||
'''
|
||||
if response_model:
|
||||
type_adapter = TypeAdapter(response_model)
|
||||
schema = type_adapter.json_schema()
|
||||
# messages = [{
|
||||
# "role": "system",
|
||||
# "content": f"Respond in JSON format with the following schema: {json.dumps(schema, indent=2)}"
|
||||
# }] + messages
|
||||
# print("Completion: ", json.dumps(messages, indent=2))
|
||||
# print("SCHEMA: " + json.dumps(schema, indent=2))
|
||||
response_format={"type": "json_object", "schema": schema }
|
||||
|
||||
tool_call_adapter = build_tool_call_adapter2(response_model, *tools)
|
||||
tool_adapters = [(fn, TypeAdapter(fn)) for fn in tools]
|
||||
tools_schemas = [{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": fn.__name__,
|
||||
"description": fn.__doc__,
|
||||
"parameters": ta.json_schema()
|
||||
}
|
||||
} for (fn, ta) in tool_adapters]
|
||||
|
||||
# messages = [{
|
||||
# "role": "system",
|
||||
# "content": '\n'.join([
|
||||
# # "You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags.",
|
||||
# # "You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:",
|
||||
# # f'<tools>{json.dumps(tools_schemas)}</tools>',
|
||||
# 'Before calling each tool, you think clearly and briefly about why and how you are using the tool.',
|
||||
# f"Respond in JSON format with the following schema: {json.dumps(schema, indent=2)}" if schema else "",
|
||||
# ])
|
||||
# }] + messages
|
||||
|
||||
i = 0
|
||||
while (max_tool_iterations is None or i < max_tool_iterations):
|
||||
body=dict(
|
||||
messages=messages,
|
||||
response_format=response_format,
|
||||
tools=tools_schemas,
|
||||
**kwargs
|
||||
)
|
||||
# sys.stderr.write(f'# REQUEST: {json.dumps(body, indent=2)}\n')
|
||||
response = requests.post(
|
||||
endpoint,
|
||||
headers={"Content-Type": "application/json"},
|
||||
json=body,
|
||||
)
|
||||
if response.status_code != 200:
|
||||
raise Exception(f"Request failed ({response.status_code}): {response.text}")
|
||||
|
||||
# sys.stderr.write(f"\n# RESPONSE:\n\n<<<{response.text}>>>\n\n")
|
||||
data = response.json()
|
||||
if 'error' in data:
|
||||
raise Exception(data['error']['message'])
|
||||
|
||||
# sys.stderr.write(f"\n# RESPONSE DATA:\n\n{json.dumps(data, indent=2)}\n\n")
|
||||
# print(json.dumps(data, indent=2))
|
||||
choice = data["choices"][0]
|
||||
|
||||
content = choice["message"].get("content")
|
||||
if choice.get("finish_reason") == "tool_calls":
|
||||
# sys.stderr.write(f'\n# TOOL CALLS:\n{json.dumps(choice["message"]["tool_calls"], indent=2)}\n\n')
|
||||
# tool_calls =ToolCallsTypeAdapter.validate_json(json.dumps(choice["tool_calls"]))
|
||||
messages.append(choice["message"])
|
||||
for tool_call in choice["message"]["tool_calls"]:
|
||||
# id = tool_call.get("id")
|
||||
# if id:
|
||||
# del tool_call["id"]
|
||||
|
||||
if content:
|
||||
print(f'💭 {content}')
|
||||
|
||||
tc = tool_call_adapter.validate_json(json.dumps(tool_call))
|
||||
|
||||
pretty_call = f'{tc.function.name}({", ".join(f"{k}={v}" for k, v in tc.function.arguments.model_dump().items())})'
|
||||
sys.stdout.write(f'⚙️ {pretty_call}')
|
||||
result = tc()
|
||||
sys.stdout.write(f" -> {result}\n")
|
||||
messages.append({
|
||||
"tool_call_id": tc.id,
|
||||
"role": "tool",
|
||||
"name": tc.function.name,
|
||||
# "content": f'{result}',
|
||||
"content": f'{pretty_call} = {result}',
|
||||
})
|
||||
else:
|
||||
assert content
|
||||
# print(content)
|
||||
# print(json.dumps(json.loads(content), indent=2))
|
||||
result = type_adapter.validate_json(content) if type_adapter else content
|
||||
# if isinstance(result, Thought):
|
||||
# print(f'💭 {result.thought}')
|
||||
# messages.append({
|
||||
# "role": "assistant",
|
||||
# "content": json.dumps(result.model_dump(), indent=2),
|
||||
# })
|
||||
# else:
|
||||
return result
|
||||
|
||||
i += 1
|
||||
|
||||
if max_tool_iterations is not None:
|
||||
raise Exception(f"Failed to get a valid response after {max_tool_iterations} tool calls")
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
class QAPair(BaseModel):
|
||||
question: str
|
||||
concise_answer: str
|
||||
justification: str
|
||||
|
||||
class PyramidalSummary(BaseModel):
|
||||
title: str
|
||||
summary: str
|
||||
question_answers: Annotated[List[QAPair], MinLen(2)]
|
||||
sub_sections: Optional[Annotated[List['PyramidalSummary'], MinLen(2)]]
|
||||
|
||||
# print("# Summary\n", create_completion(
|
||||
# model="...",
|
||||
# response_model=PyramidalSummary,
|
||||
# messages=[{
|
||||
# "role": "user",
|
||||
# "content": f"""
|
||||
# You are a highly efficient corporate document summarizer.
|
||||
# Create a pyramidal summary of an imaginary internal document about our company processes
|
||||
# (starting high-level, going down to each sub sections).
|
||||
# Keep questions short, and answers even shorter (trivia / quizz style).
|
||||
# """
|
||||
# }]))
|
||||
|
||||
import math
|
||||
|
||||
def eval_python_expression(expr: str) -> float:
|
||||
"""
|
||||
Evaluate a Python expression reliably.
|
||||
This can be used to compute complex nested mathematical expressions, or any python, really.
|
||||
"""
|
||||
print("# Evaluating expression: ", expr)
|
||||
return "0.0"
|
||||
|
||||
def add(a: float, b: float) -> float:
|
||||
"""
|
||||
Add a and b reliably.
|
||||
Don't use this tool to compute the square of a number (use multiply or pow instead)
|
||||
"""
|
||||
return a + b
|
||||
|
||||
# def say(something: str) -> str:
|
||||
# """
|
||||
# Just says something. Used to say each thought out loud
|
||||
# """
|
||||
# return subprocess.check_call(["say", something])
|
||||
|
||||
def multiply(a: float, b: float) -> float:
|
||||
"""Multiply a with b reliably"""
|
||||
return a * b
|
||||
|
||||
def divide(a: float, b: float) -> float:
|
||||
"""Divide a by b reliably"""
|
||||
return a / b
|
||||
|
||||
def pow(value: float, power: float) -> float:
|
||||
"""
|
||||
Raise a value to a power (exponent) reliably.
|
||||
The square of x is pow(x, 2), its cube is pow(x, 3), etc.
|
||||
"""
|
||||
return math.pow(value, power)
|
||||
|
||||
result = create_completion2(
|
||||
model="...",
|
||||
response_model=str,
|
||||
tools=[add, multiply, divide, pow], #, say],#, eval_python_expression],
|
||||
# tools=[eval_python_expression],
|
||||
temperature=0.0,
|
||||
# repetition_penalty=1.0,
|
||||
n_predict=1000,
|
||||
top_k=1,
|
||||
top_p=0.0,
|
||||
# logit_bias={
|
||||
# i: 10.0
|
||||
# for i in range(1, 259)
|
||||
# },
|
||||
messages=[{
|
||||
# "role": "system",
|
||||
# "content": f"""
|
||||
# You are a reliable assistant. You think step by step and think before using tools
|
||||
# """
|
||||
# }, {
|
||||
"role": "user",
|
||||
# "content": f"""
|
||||
# What is 10 squared?
|
||||
# """
|
||||
"content": f"""
|
||||
What is the sum of 2535 squared and 32222000403 then multiplied by one and a half. What's a third of the result?
|
||||
|
||||
Keep your goal in mind at every step.
|
||||
"""
|
||||
# Think step by step, start expressing the problem as an arithmetic expression
|
||||
}])
|
||||
|
||||
# result = create_completion(
|
||||
# model="...",
|
||||
# response_model=float,
|
||||
# tools=[add, multiply, divide, pow], #, say],#, eval_python_expression],
|
||||
# temperature=0.0,
|
||||
# # logit_bias={
|
||||
# # i: 10.0
|
||||
# # for i in range(1, 259)
|
||||
# # },
|
||||
# messages=[{
|
||||
# "role": "user",
|
||||
# # "content": f"""
|
||||
# # What is 10 squared?
|
||||
# # """
|
||||
# "content": f"""
|
||||
# What is the sum of 2535 squared and 32222000403 then multiplied by one and a half. What's a third of the result?
|
||||
# """
|
||||
# # Think step by step, start expressing the problem as an arithmetic expression
|
||||
# }])
|
||||
|
||||
# 💭 First, I need to square the number 2535. For this, I will use the 'pow' tool.
|
||||
# ⚙️ pow(args={'value': 2535.0, 'power': 2.0})-> 6426225.0
|
||||
# 💭 Now that I have the square of 2535, I need to add it to 32222000403.0 and store the result.
|
||||
# ⚙️ add(args={'a': 6426225.0, 'b': 32222000403.0})-> 32228426628.0
|
||||
# 💭 Now that I have the sum of 2535 squared and 32222000403, I need to multiply it by 1.5.
|
||||
# ⚙️ pow(args={'value': 32228426628.0, 'power': 1.5})-> 5785736571757004.0
|
||||
# 💭 Now that I have the result of the sum multiplied by 1.5, I need to divide it by 3 to get a third of the result.
|
||||
# ⚙️ divide(args={'a': 5785736571757004.0, 'b': 3.0})-> 1928578857252334.8
|
||||
# 💭 I have now calculated a third of the result, which is 1928578857252334.8. I can now share this as the final answer.
|
||||
# Result: 1928578857252334.8
|
||||
|
||||
expected_result = (2535 ** 2 + 32222000403) * 1.5 / 3.0
|
||||
print("➡️", result)
|
||||
assert math.fabs(result - expected_result) < 0.0001, f"Expected {expected_result}, got {result}"
|
|
@ -16,9 +16,9 @@ echo "# Starting the server" >&2
|
|||
args=(
|
||||
# --cpp_server_endpoint "http://localhost:8081"
|
||||
|
||||
--model ~/AI/Models/functionary-medium-v2.2.q4_0.gguf
|
||||
# --model ~/AI/Models/functionary-medium-v2.2.q4_0.gguf
|
||||
|
||||
# --model ~/AI/Models/mixtral-8x7b-instruct-v0.1.Q8_0.gguf
|
||||
--model ~/AI/Models/mixtral-8x7b-instruct-v0.1.Q8_0.gguf
|
||||
# --model ~/AI/Models/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf
|
||||
|
||||
# --model ~/AI/Models/Hermes-2-Pro-Mistral-7B.Q8_0.gguf
|
||||
|
@ -31,33 +31,8 @@ sleep 5
|
|||
|
||||
echo "# Send a message to the chat API" >&2
|
||||
|
||||
# curl http://localhost:8080/v1/chat/completions \
|
||||
# -H "Content-Type: application/json" \
|
||||
# -H "Authorization: Bearer $OPENAI_API_KEY" \
|
||||
# -d '{
|
||||
# "model": "gpt-3.5-turbo",
|
||||
# "tools": [{
|
||||
# "type": "function",
|
||||
# "function": {
|
||||
# "name": "get_current_weather",
|
||||
# "description": "Get the current weather",
|
||||
# "parameters": {
|
||||
# "type": "object",
|
||||
# "properties": {
|
||||
# "location": {
|
||||
# "type": "string",
|
||||
# "description": "The city and state, e.g. San Francisco, CA"
|
||||
# }
|
||||
# },
|
||||
# "required": ["location"]
|
||||
# }
|
||||
# }
|
||||
# }],
|
||||
# "messages": [
|
||||
# {"role": "user", "content": "I live in the UK. what is the weather going to be like in San Francisco and Glasgow over the next 4 days."}
|
||||
# ]
|
||||
# }' | \
|
||||
# jq .
|
||||
python -m examples.openai.reactor
|
||||
exit
|
||||
|
||||
curl http://localhost:8080/v1/chat/completions \
|
||||
-H "Content-Type: application/json" \
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue