From 253b68d9a7072342c15b775bb971177526712066 Mon Sep 17 00:00:00 2001 From: ochafik Date: Fri, 29 Mar 2024 03:24:29 +0000 Subject: [PATCH] server.py: crude reactor --- examples/openai/api.py | 4 + examples/openai/prompting.py | 31 +++- examples/openai/reactor.py | 344 +++++++++++++++++++++++++++++++++++ examples/openai/test.sh | 33 +--- 4 files changed, 373 insertions(+), 39 deletions(-) create mode 100644 examples/openai/reactor.py diff --git a/examples/openai/api.py b/examples/openai/api.py index 7c4a446b8..98d710d9c 100644 --- a/examples/openai/api.py +++ b/examples/openai/api.py @@ -10,8 +10,12 @@ class ToolCall(BaseModel): type: Literal["function"] = "function" function: FunctionCall +ToolCallsTypeAdapter = TypeAdapter(list[ToolCall]) + class Message(BaseModel): role: str + name: Optional[str] = None + tool_call_id: Optional[str] = None content: Optional[str] tool_calls: Optional[list[ToolCall]] = None diff --git a/examples/openai/prompting.py b/examples/openai/prompting.py index 60dab69ba..8657861a1 100644 --- a/examples/openai/prompting.py +++ b/examples/openai/prompting.py @@ -41,8 +41,10 @@ class ChatTemplate(BaseModel): if "<|recipient|>' + tool_call['function']['name']" in template: self._tool_style = ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2 else: - self._tool_style = ToolsPromptStyle.TOOLS_BESPOKE - # self._tool_style = ToolsPromptStyle.TOOLS_LONG + # self._tool_style = ToolsPromptStyle.TOOLS_BESPOKE + + self._tool_style = ToolsPromptStyle.TOOLS_LONG + # self._tool_style = ToolsPromptStyle.TOOLS_MISTRAL # TODO: Test whether the template supports formatting tool_calls @@ -87,6 +89,8 @@ class ChatTemplate(BaseModel): eos_token = tokens[metadata[Keys.Tokenizer.EOS_ID]]) def render(self, messages: list[Message], add_generation_prompt: bool, omit_bos: bool = False): + sys.stderr.write(f'# strict_user_assistant_alternation={self._strict_user_assistant_alternation}\n') + sys.stderr.write(f'# messages=' + "\n".join(json.dumps(m.model_dump(), indent=2) for m in messages) + '\n') if self._strict_user_assistant_alternation and any(m.role not in ('user', 'assistant') for m in messages): new_messages=[] i = 0 @@ -106,6 +110,12 @@ class ChatTemplate(BaseModel): content=f'{messages[i].content}\n{tc}' )) i += 1 + elif messages[i].role == 'tool': + new_messages.append(Message( + role="user", + content=f'TOOL(name={messages[i].name}, id={messages[i].tool_call_id}): {messages[i].content}', + )) + i += 1 else: new_messages.append(messages[i]) i += 1 @@ -408,12 +418,13 @@ class FunctionaryToolsChatHandler(ChatHandler): content = '\n'.join(text_content).strip() return Message(role="assistant", content=content if content else None, tool_calls=tool_calls if tool_calls else None) -def _make_bespoke_schema(response_schema, tool_call_schema): +def _make_bespoke_schema(response_schema, tool_call_schema, allow_parallel_calls=False): return { "type": "object", "properties": { - # "original_goal": {"title": "Original Goal", "type": "string"}, - "thought": { + "original_goal": {"title": "Original Goal", "type": "string"}, + "thought_about_next_step_only": { + "title": "Thought about next step", # "title": "Thought about how the next step brings us closer to achieving the original goal", "type": "string" }, @@ -421,14 +432,14 @@ def _make_bespoke_schema(response_schema, tool_call_schema): "title": "Next Step: either a result or one or more tool calls to achieve the original goal", "oneOf": [ { - "title": "Tool Calls", + # "title": "Tool Calls", "properties": { # "type": { # "const": "tool_calls" # }, "tool_calls": { - "type": "array", - "items": tool_call_schema + "prefixItems": tool_call_schema if allow_parallel_calls \ + else [tool_call_schema], } }, "required": ["tool_calls"] @@ -443,7 +454,7 @@ def _make_bespoke_schema(response_schema, tool_call_schema): ] }, }, - "required": ["original_goal", "thought", "next_step"] + "required": ["original_goal", "thought_about_next_step_only", "next_step"] } class BespokeToolsChatHandler(ChatHandler): @@ -516,7 +527,7 @@ class BespokeToolsChatHandler(ChatHandler): elif 'tool_calls' in next_step: return Message( role="assistant", - content=data["thought"], + content=data["thought_about_next_step_only"], tool_calls=[ ToolCall(id=gen_callid(), function=FunctionCall(**tc)) for tc in next_step['tool_calls'] diff --git a/examples/openai/reactor.py b/examples/openai/reactor.py new file mode 100644 index 000000000..7aae066eb --- /dev/null +++ b/examples/openai/reactor.py @@ -0,0 +1,344 @@ +# Usage: +#! ./server -m some-model.gguf & +#! pip install pydantic +#! python examples/json-schema-pydantic-example.py +# +# TODO: +# - https://github.com/NousResearch/Hermes-Function-Calling +# +# <|im_start|>system +# You are a function calling AI model. You are provided with function signatures within XML tags +# You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: +# {'type': 'function', 'function': {'name': 'get_stock_fundamentals', +# 'description': 'get_stock_fundamentals(symbol: str) -> dict - Get fundamental data for a given stock symbol using yfinance API.\n\n Args:\n symbol (str): The stock symbol.\n\n Returns:\n dict: A dictionary containing fundamental data.', 'parameters': {'type': 'object', 'properties': {'symbol': {'type': 'string'}}, 'required': ['symbol']}}} +# Use the following pydantic model json schema for each tool call you will make: {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} For each function call return a json object with function name and arguments within XML tags as follows: +# +# {'arguments': , 'name': } +# <|im_end|> + +from dataclasses import dataclass +import subprocess +import sys +from pydantic import BaseModel, TypeAdapter +from annotated_types import MinLen +from typing import Annotated, Callable, List, Union, Literal, Optional, Type, get_args, get_origin +import json, requests + +from examples.openai.api import ToolCallsTypeAdapter + +def type_to_str(t): + origin = get_origin(t) + if origin is None: + return t.__name__ + args = get_args(t) + return origin.__name__ + ( + f'[{", ".join(type_to_str(a) for a in args)}]' if args else '' + ) + +def build_union_type_adapter(*types): + src = '\n'.join([ + 'from pydantic import TypeAdapter', + 'from typing import Union', + f'_out = TypeAdapter(Union[{", ".join(type_to_str(t) for t in types)}])', + ]) + globs = { + **globals(), + **{t.__name__: t for t in types}, + } + exec(src, globs) + return globs['_out'] + +class Thought(BaseModel): + thought: str + + +def build_tool_call_adapter2(final_output_type, *tools): + lines = [ + 'from pydantic import BaseModel, TypeAdapter', + 'from typing import Literal, Union', + ] + globs = { + **globals(), + **locals(), + final_output_type.__name__: final_output_type, + } + tool_calls = [] + for fn in tools: + #Β TODO: escape fn.__doc__ and fn.__doc__ to avoid comment or metadata injection! + fn_name = fn.__name__ + fn_doc = fn.__doc__.replace('"""', "'''") if fn.__doc__ else None + name = fn_name.replace('_', ' ').title().replace(' ', '') + lines += [ + f'class {name}ToolArgs(BaseModel):', + *(f' {k}: {type_to_str(v)}' for k, v in fn.__annotations__.items() if k != 'return'), + f'class {name}ToolCall(BaseModel):', + *([f' """{fn_doc}"""'] if fn_doc else []), + f' name: Literal["{fn_name}"]', + f' arguments: {name}ToolArgs', + f'class {name}Tool(BaseModel):', + # *([f' """{fn_doc}"""'] if fn_doc else []), + f' id: str', + f' type: Literal["function"]', + f' function: {name}ToolCall', + f' def __call__(self) -> {type_to_str(fn.__annotations__.get("return"))}:', + f' return {fn_name}(**self.function.arguments.dict())', + ] + tool_calls.append(f'{name}Tool') + + lines += [ + # 'class FinalResult(BaseModel):', + # f' result: {type_to_str(final_output_type)}', + # 'class Response(BaseModel):', + # f' """A response that starts with a thought about whether we need tools or not, the plan about tool usage (maybe a sequence of tool calls), and then either a final result (of type {final_output_type.__name__}) or a first tool call"""', + # f' original_goal: str', + # f' thought_process: str', + # # f' thought: str', + # f' next_step: Union[FinalResult, {", ".join(tool_calls)}]', + # f'response_adapter = TypeAdapter(Response)' + f'response_adapter = TypeAdapter(Union[{", ".join(tool_calls)}])', + ] + + exec('\n'.join(lines), globs) + return globs['response_adapter'] + +def create_completion2(*, response_model=None, max_tool_iterations=None, tools=[], endpoint="http://localhost:8080/v1/chat/completions", messages, **kwargs): + ''' + Creates a chat completion using an OpenAI-compatible endpoint w/ JSON schema support + (llama.cpp server, llama-cpp-python, Anyscale / Together...) + + The response_model param takes a type (+ supports Pydantic) and behaves just as w/ Instructor (see below) + ''' + if response_model: + type_adapter = TypeAdapter(response_model) + schema = type_adapter.json_schema() + # messages = [{ + # "role": "system", + # "content": f"Respond in JSON format with the following schema: {json.dumps(schema, indent=2)}" + # }] + messages + # print("Completion: ", json.dumps(messages, indent=2)) + # print("SCHEMA: " + json.dumps(schema, indent=2)) + response_format={"type": "json_object", "schema": schema } + + tool_call_adapter = build_tool_call_adapter2(response_model, *tools) + tool_adapters = [(fn, TypeAdapter(fn)) for fn in tools] + tools_schemas = [{ + "type": "function", + "function": { + "name": fn.__name__, + "description": fn.__doc__, + "parameters": ta.json_schema() + } + } for (fn, ta) in tool_adapters] + + # messages = [{ + # "role": "system", + # "content": '\n'.join([ + # # "You are a function calling AI model. You are provided with function signatures within XML tags.", + # # "You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:", + # # f'{json.dumps(tools_schemas)}', + # 'Before calling each tool, you think clearly and briefly about why and how you are using the tool.', + # f"Respond in JSON format with the following schema: {json.dumps(schema, indent=2)}" if schema else "", + # ]) + # }] + messages + + i = 0 + while (max_tool_iterations is None or i < max_tool_iterations): + body=dict( + messages=messages, + response_format=response_format, + tools=tools_schemas, + **kwargs + ) + # sys.stderr.write(f'# REQUEST: {json.dumps(body, indent=2)}\n') + response = requests.post( + endpoint, + headers={"Content-Type": "application/json"}, + json=body, + ) + if response.status_code != 200: + raise Exception(f"Request failed ({response.status_code}): {response.text}") + + # sys.stderr.write(f"\n# RESPONSE:\n\n<<<{response.text}>>>\n\n") + data = response.json() + if 'error' in data: + raise Exception(data['error']['message']) + + # sys.stderr.write(f"\n# RESPONSE DATA:\n\n{json.dumps(data, indent=2)}\n\n") + # print(json.dumps(data, indent=2)) + choice = data["choices"][0] + + content = choice["message"].get("content") + if choice.get("finish_reason") == "tool_calls": + # sys.stderr.write(f'\n# TOOL CALLS:\n{json.dumps(choice["message"]["tool_calls"], indent=2)}\n\n') + # tool_calls =ToolCallsTypeAdapter.validate_json(json.dumps(choice["tool_calls"])) + messages.append(choice["message"]) + for tool_call in choice["message"]["tool_calls"]: + # id = tool_call.get("id") + # if id: + # del tool_call["id"] + + if content: + print(f'πŸ’­ {content}') + + tc = tool_call_adapter.validate_json(json.dumps(tool_call)) + + pretty_call = f'{tc.function.name}({", ".join(f"{k}={v}" for k, v in tc.function.arguments.model_dump().items())})' + sys.stdout.write(f'βš™οΈ {pretty_call}') + result = tc() + sys.stdout.write(f" -> {result}\n") + messages.append({ + "tool_call_id": tc.id, + "role": "tool", + "name": tc.function.name, + # "content": f'{result}', + "content": f'{pretty_call} = {result}', + }) + else: + assert content + # print(content) + # print(json.dumps(json.loads(content), indent=2)) + result = type_adapter.validate_json(content) if type_adapter else content + # if isinstance(result, Thought): + # print(f'πŸ’­ {result.thought}') + # messages.append({ + # "role": "assistant", + # "content": json.dumps(result.model_dump(), indent=2), + # }) + # else: + return result + + i += 1 + + if max_tool_iterations is not None: + raise Exception(f"Failed to get a valid response after {max_tool_iterations} tool calls") + +if __name__ == '__main__': + + class QAPair(BaseModel): + question: str + concise_answer: str + justification: str + + class PyramidalSummary(BaseModel): + title: str + summary: str + question_answers: Annotated[List[QAPair], MinLen(2)] + sub_sections: Optional[Annotated[List['PyramidalSummary'], MinLen(2)]] + + # print("# Summary\n", create_completion( + # model="...", + # response_model=PyramidalSummary, + # messages=[{ + # "role": "user", + # "content": f""" + # You are a highly efficient corporate document summarizer. + # Create a pyramidal summary of an imaginary internal document about our company processes + # (starting high-level, going down to each sub sections). + # Keep questions short, and answers even shorter (trivia / quizz style). + # """ + # }])) + + import math + + def eval_python_expression(expr: str) -> float: + """ + Evaluate a Python expression reliably. + This can be used to compute complex nested mathematical expressions, or any python, really. + """ + print("# Evaluating expression: ", expr) + return "0.0" + + def add(a: float, b: float) -> float: + """ + Add a and b reliably. + Don't use this tool to compute the square of a number (use multiply or pow instead) + """ + return a + b + + # def say(something: str) -> str: + # """ + # Just says something. Used to say each thought out loud + # """ + # return subprocess.check_call(["say", something]) + + def multiply(a: float, b: float) -> float: + """Multiply a with b reliably""" + return a * b + + def divide(a: float, b: float) -> float: + """Divide a by b reliably""" + return a / b + + def pow(value: float, power: float) -> float: + """ + Raise a value to a power (exponent) reliably. + The square of x is pow(x, 2), its cube is pow(x, 3), etc. + """ + return math.pow(value, power) + + result = create_completion2( + model="...", + response_model=str, + tools=[add, multiply, divide, pow], #, say],#, eval_python_expression], + # tools=[eval_python_expression], + temperature=0.0, + # repetition_penalty=1.0, + n_predict=1000, + top_k=1, + top_p=0.0, + # logit_bias={ + # i: 10.0 + # for i in range(1, 259) + # }, + messages=[{ + # "role": "system", + # "content": f""" + # You are a reliable assistant. You think step by step and think before using tools + # """ + # }, { + "role": "user", + # "content": f""" + # What is 10 squared? + # """ + "content": f""" + What is the sum of 2535 squared and 32222000403 then multiplied by one and a half. What's a third of the result? + + Keep your goal in mind at every step. + """ + # Think step by step, start expressing the problem as an arithmetic expression + }]) + + # result = create_completion( + # model="...", + # response_model=float, + # tools=[add, multiply, divide, pow], #, say],#, eval_python_expression], + # temperature=0.0, + # # logit_bias={ + # # i: 10.0 + # # for i in range(1, 259) + # # }, + # messages=[{ + # "role": "user", + # # "content": f""" + # # What is 10 squared? + # # """ + # "content": f""" + # What is the sum of 2535 squared and 32222000403 then multiplied by one and a half. What's a third of the result? + # """ + # # Think step by step, start expressing the problem as an arithmetic expression + # }]) + + # πŸ’­ First, I need to square the number 2535. For this, I will use the 'pow' tool. + # βš™οΈ pow(args={'value': 2535.0, 'power': 2.0})-> 6426225.0 + # πŸ’­ Now that I have the square of 2535, I need to add it to 32222000403.0 and store the result. + # βš™οΈ add(args={'a': 6426225.0, 'b': 32222000403.0})-> 32228426628.0 + # πŸ’­ Now that I have the sum of 2535 squared and 32222000403, I need to multiply it by 1.5. + # βš™οΈ pow(args={'value': 32228426628.0, 'power': 1.5})-> 5785736571757004.0 + # πŸ’­ Now that I have the result of the sum multiplied by 1.5, I need to divide it by 3 to get a third of the result. + # βš™οΈ divide(args={'a': 5785736571757004.0, 'b': 3.0})-> 1928578857252334.8 + # πŸ’­ I have now calculated a third of the result, which is 1928578857252334.8. I can now share this as the final answer. + # Result: 1928578857252334.8 + + expected_result = (2535 ** 2 + 32222000403) * 1.5 / 3.0 + print("➑️", result) + assert math.fabs(result - expected_result) < 0.0001, f"Expected {expected_result}, got {result}" diff --git a/examples/openai/test.sh b/examples/openai/test.sh index 44a6c44de..4dca39ade 100755 --- a/examples/openai/test.sh +++ b/examples/openai/test.sh @@ -16,9 +16,9 @@ echo "# Starting the server" >&2 args=( # --cpp_server_endpoint "http://localhost:8081" - --model ~/AI/Models/functionary-medium-v2.2.q4_0.gguf + # --model ~/AI/Models/functionary-medium-v2.2.q4_0.gguf - # --model ~/AI/Models/mixtral-8x7b-instruct-v0.1.Q8_0.gguf + --model ~/AI/Models/mixtral-8x7b-instruct-v0.1.Q8_0.gguf # --model ~/AI/Models/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf # --model ~/AI/Models/Hermes-2-Pro-Mistral-7B.Q8_0.gguf @@ -31,33 +31,8 @@ sleep 5 echo "# Send a message to the chat API" >&2 -# curl http://localhost:8080/v1/chat/completions \ -# -H "Content-Type: application/json" \ -# -H "Authorization: Bearer $OPENAI_API_KEY" \ -# -d '{ -# "model": "gpt-3.5-turbo", -# "tools": [{ -# "type": "function", -# "function": { -# "name": "get_current_weather", -# "description": "Get the current weather", -# "parameters": { -# "type": "object", -# "properties": { -# "location": { -# "type": "string", -# "description": "The city and state, e.g. San Francisco, CA" -# } -# }, -# "required": ["location"] -# } -# } -# }], -# "messages": [ -# {"role": "user", "content": "I live in the UK. what is the weather going to be like in San Francisco and Glasgow over the next 4 days."} -# ] -# }' | \ -# jq . +python -m examples.openai.reactor +exit curl http://localhost:8080/v1/chat/completions \ -H "Content-Type: application/json" \