server.py: crude reactor

2024-03-29 03:24:29 +00:00 · 2024-03-29 03:24:29 +00:00 · 253b68d9a7
commit 253b68d9a7
parent 59b411406f
4 changed files with 373 additions and 39 deletions
--- a/examples/openai/api.py
+++ b/examples/openai/api.py
@ -10,8 +10,12 @@ class ToolCall(BaseModel):
    type: Literal["function"] = "function"
    function: FunctionCall

+ToolCallsTypeAdapter = TypeAdapter(list[ToolCall])
+
 class Message(BaseModel):
    role: str
+    name: Optional[str] = None
+    tool_call_id: Optional[str] = None
    content: Optional[str]
    tool_calls: Optional[list[ToolCall]] = None

--- a/examples/openai/prompting.py
+++ b/examples/openai/prompting.py
@ -41,8 +41,10 @@ class ChatTemplate(BaseModel):
        if "<|recipient|>' + tool_call['function']['name']" in template:
            self._tool_style = ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2
        else:
-            self._tool_style = ToolsPromptStyle.TOOLS_BESPOKE
-            # self._tool_style = ToolsPromptStyle.TOOLS_LONG
+            # self._tool_style = ToolsPromptStyle.TOOLS_BESPOKE
+
+            self._tool_style = ToolsPromptStyle.TOOLS_LONG
+            # self._tool_style = ToolsPromptStyle.TOOLS_MISTRAL

        # TODO: Test whether the template supports formatting tool_calls
        
@ -87,6 +89,8 @@ class ChatTemplate(BaseModel):
            eos_token = tokens[metadata[Keys.Tokenizer.EOS_ID]])

    def render(self, messages: list[Message], add_generation_prompt: bool, omit_bos: bool = False):
+        sys.stderr.write(f'# strict_user_assistant_alternation={self._strict_user_assistant_alternation}\n')
+        sys.stderr.write(f'# messages=' + "\n".join(json.dumps(m.model_dump(), indent=2) for m in messages) + '\n')
        if self._strict_user_assistant_alternation and any(m.role not in ('user', 'assistant') for m in messages):
            new_messages=[]
            i = 0
@ -106,6 +110,12 @@ class ChatTemplate(BaseModel):
                        content=f'{messages[i].content}\n{tc}'
                    ))
                    i += 1
+                elif messages[i].role == 'tool':
+                    new_messages.append(Message(
+                        role="user",
+                        content=f'TOOL(name={messages[i].name}, id={messages[i].tool_call_id}): {messages[i].content}',
+                    ))  
+                    i += 1
                else:
                    new_messages.append(messages[i])
                    i += 1
@ -408,12 +418,13 @@ class FunctionaryToolsChatHandler(ChatHandler):
            content = '\n'.join(text_content).strip()
            return Message(role="assistant", content=content if content else None, tool_calls=tool_calls if tool_calls else None)

-def _make_bespoke_schema(response_schema, tool_call_schema):
+def _make_bespoke_schema(response_schema, tool_call_schema, allow_parallel_calls=False):
    return {
        "type": "object",
        "properties": {
-            # "original_goal": {"title": "Original Goal", "type": "string"},
-            "thought": {
+            "original_goal": {"title": "Original Goal", "type": "string"},
+            "thought_about_next_step_only": {
+                "title": "Thought about next step",
                # "title": "Thought about how the next step brings us closer to achieving the original goal",
                "type": "string"
            },
@ -421,14 +432,14 @@ def _make_bespoke_schema(response_schema, tool_call_schema):
                "title": "Next Step: either a result or one or more tool calls to achieve the original goal",
                "oneOf": [
                    {
-                        "title": "Tool Calls",
+                        # "title": "Tool Calls",
                        "properties": {
                            # "type": {
                            #     "const": "tool_calls"
                            # },
                            "tool_calls": {
-                                "type": "array",
-                                "items": tool_call_schema
+                                "prefixItems": tool_call_schema if allow_parallel_calls \
+                                    else [tool_call_schema],
                            }
                        },
                        "required": ["tool_calls"]
@ -443,7 +454,7 @@ def _make_bespoke_schema(response_schema, tool_call_schema):
                ]
            },
        },
-        "required": ["original_goal", "thought", "next_step"]
+        "required": ["original_goal", "thought_about_next_step_only", "next_step"]
    }

 class BespokeToolsChatHandler(ChatHandler):
@ -516,7 +527,7 @@ class BespokeToolsChatHandler(ChatHandler):
        elif 'tool_calls' in next_step:
            return Message(
                role="assistant",
-                content=data["thought"],
+                content=data["thought_about_next_step_only"],
                tool_calls=[
                    ToolCall(id=gen_callid(), function=FunctionCall(**tc))
                    for tc in next_step['tool_calls']
--- a/examples/openai/reactor.py
+++ b/examples/openai/reactor.py
@ -0,0 +1,344 @@
+# Usage:
+#! ./server -m some-model.gguf &
+#! pip install pydantic
+#! python examples/json-schema-pydantic-example.py
+#
+# TODO:
+# - https://github.com/NousResearch/Hermes-Function-Calling
+#
+# <|im_start|>system
+# You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags
+# You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:
+# <tools> {'type': 'function', 'function': {'name': 'get_stock_fundamentals',
+# 'description': 'get_stock_fundamentals(symbol: str) -> dict - Get fundamental data for a given stock symbol using yfinance API.\n\n    Args:\n    symbol (str): The stock symbol.\n\n    Returns:\n    dict: A dictionary containing fundamental data.', 'parameters': {'type': 'object', 'properties': {'symbol': {'type': 'string'}}, 'required': ['symbol']}}} 
+# </tools> Use the following pydantic model json schema for each tool call you will make: {'title': 'FunctionCall', 'type': 'object', 'properties': {'arguments': {'title': 'Arguments', 'type': 'object'}, 'name': {'title': 'Name', 'type': 'string'}}, 'required': ['arguments', 'name']} For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:
+# <tool_call>
+# {'arguments': <args-dict>, 'name': <function-name>}
+# </tool_call><|im_end|>
+
+from dataclasses import dataclass
+import subprocess
+import sys
+from pydantic import BaseModel, TypeAdapter
+from annotated_types import MinLen
+from typing import Annotated, Callable, List, Union, Literal, Optional, Type, get_args, get_origin
+import json, requests
+
+from examples.openai.api import ToolCallsTypeAdapter
+
+def type_to_str(t):
+    origin = get_origin(t)
+    if origin is None:
+        return t.__name__
+    args = get_args(t)
+    return origin.__name__ + (
+        f'[{", ".join(type_to_str(a) for a in args)}]' if args else ''
+    )
+
+def build_union_type_adapter(*types):
+    src = '\n'.join([
+        'from pydantic import TypeAdapter',
+        'from typing import Union',
+        f'_out = TypeAdapter(Union[{", ".join(type_to_str(t) for t in types)}])',
+    ])
+    globs = {
+        **globals(),
+        **{t.__name__: t for t in types},
+    }
+    exec(src, globs)
+    return globs['_out']
+
+class Thought(BaseModel):
+    thought: str
+
+
+def build_tool_call_adapter2(final_output_type, *tools):
+    lines = [
+        'from pydantic import BaseModel, TypeAdapter',
+        'from typing import Literal, Union',
+    ]
+    globs = {
+        **globals(),
+        **locals(),
+        final_output_type.__name__: final_output_type,
+    }
+    tool_calls = []
+    for fn in tools:
+        # TODO: escape fn.__doc__ and fn.__doc__ to avoid comment or metadata injection!
+        fn_name = fn.__name__
+        fn_doc = fn.__doc__.replace('"""', "'''") if fn.__doc__ else None
+        name = fn_name.replace('_', ' ').title().replace(' ', '')
+        lines += [
+            f'class {name}ToolArgs(BaseModel):',
+            *(f'  {k}: {type_to_str(v)}' for k, v in fn.__annotations__.items() if k != 'return'),
+            f'class {name}ToolCall(BaseModel):',
+            *([f'  """{fn_doc}"""'] if fn_doc else []),
+            f'  name: Literal["{fn_name}"]',
+            f'  arguments: {name}ToolArgs',
+            f'class {name}Tool(BaseModel):',
+            # *([f'  """{fn_doc}"""'] if fn_doc else []),
+            f'  id: str',
+            f'  type: Literal["function"]',
+            f'  function: {name}ToolCall',
+            f'  def __call__(self) -> {type_to_str(fn.__annotations__.get("return"))}:',
+            f'    return {fn_name}(**self.function.arguments.dict())',
+        ]
+        tool_calls.append(f'{name}Tool')
+    
+    lines += [
+        # 'class FinalResult(BaseModel):',
+        # f'  result: {type_to_str(final_output_type)}',
+        # 'class Response(BaseModel):',
+        # f'  """A response that starts with a thought about whether we need tools or not, the plan about tool usage (maybe a sequence of tool calls), and then either a final result (of type {final_output_type.__name__}) or a first tool call"""',
+        # f'  original_goal: str',
+        # f'  thought_process: str',
+        # # f'  thought: str',
+        # f'  next_step: Union[FinalResult, {", ".join(tool_calls)}]',
+        # f'response_adapter = TypeAdapter(Response)'
+        f'response_adapter = TypeAdapter(Union[{", ".join(tool_calls)}])',
+    ]
+
+    exec('\n'.join(lines), globs)
+    return globs['response_adapter']
+
+def create_completion2(*, response_model=None, max_tool_iterations=None, tools=[], endpoint="http://localhost:8080/v1/chat/completions", messages, **kwargs):
+    '''
+    Creates a chat completion using an OpenAI-compatible endpoint w/ JSON schema support
+    (llama.cpp server, llama-cpp-python, Anyscale / Together...)
+
+    The response_model param takes a type (+ supports Pydantic) and behaves just as w/ Instructor (see below)
+    '''
+    if response_model:
+        type_adapter = TypeAdapter(response_model)
+        schema = type_adapter.json_schema()
+        # messages = [{
+        #     "role": "system",
+        #     "content": f"Respond in JSON format with the following schema: {json.dumps(schema, indent=2)}"
+        # }] + messages
+        # print("Completion: ", json.dumps(messages, indent=2))
+        # print("SCHEMA: " + json.dumps(schema, indent=2))
+        response_format={"type": "json_object", "schema": schema }
+
+    tool_call_adapter = build_tool_call_adapter2(response_model, *tools)
+    tool_adapters = [(fn, TypeAdapter(fn)) for fn in tools]
+    tools_schemas = [{
+        "type": "function",
+        "function": {
+            "name": fn.__name__,
+            "description": fn.__doc__,
+            "parameters": ta.json_schema()
+        }
+    } for (fn, ta) in tool_adapters]
+
+    # messages = [{
+    #     "role": "system",
+    #     "content": '\n'.join([
+    # #         "You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags.",
+    # #         "You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:",
+    # #         f'<tools>{json.dumps(tools_schemas)}</tools>',
+    #         'Before calling each tool, you think clearly and briefly about why and how you are using the tool.',
+    #         f"Respond in JSON format with the following schema: {json.dumps(schema, indent=2)}" if schema else "",
+    #     ])
+    # }] + messages
+
+    i = 0
+    while (max_tool_iterations is None or i < max_tool_iterations):
+        body=dict(
+            messages=messages,
+            response_format=response_format,
+            tools=tools_schemas,
+            **kwargs
+        )
+        # sys.stderr.write(f'# REQUEST: {json.dumps(body, indent=2)}\n')
+        response = requests.post(
+            endpoint,
+            headers={"Content-Type": "application/json"},
+            json=body,
+        )
+        if response.status_code != 200:
+            raise Exception(f"Request failed ({response.status_code}): {response.text}")
+
+        # sys.stderr.write(f"\n# RESPONSE:\n\n<<<{response.text}>>>\n\n")
+        data = response.json()
+        if 'error' in data:
+            raise Exception(data['error']['message'])
+
+        # sys.stderr.write(f"\n# RESPONSE DATA:\n\n{json.dumps(data, indent=2)}\n\n")
+        # print(json.dumps(data, indent=2))
+        choice = data["choices"][0]
+
+        content = choice["message"].get("content")
+        if choice.get("finish_reason") == "tool_calls":
+            # sys.stderr.write(f'\n# TOOL CALLS:\n{json.dumps(choice["message"]["tool_calls"], indent=2)}\n\n')
+            # tool_calls =ToolCallsTypeAdapter.validate_json(json.dumps(choice["tool_calls"]))
+            messages.append(choice["message"])
+            for tool_call in choice["message"]["tool_calls"]:
+                # id = tool_call.get("id")
+                # if id:
+                #     del tool_call["id"]
+
+                if content:
+                    print(f'💭 {content}')
+
+                tc = tool_call_adapter.validate_json(json.dumps(tool_call))
+                
+                pretty_call = f'{tc.function.name}({", ".join(f"{k}={v}" for k, v in tc.function.arguments.model_dump().items())})'
+                sys.stdout.write(f'⚙️  {pretty_call}')
+                result = tc()
+                sys.stdout.write(f" -> {result}\n")
+                messages.append({
+                    "tool_call_id": tc.id,
+                    "role": "tool",
+                    "name": tc.function.name,
+                    # "content": f'{result}',
+                    "content": f'{pretty_call} = {result}',
+                })
+        else:
+            assert content
+            # print(content)
+            # print(json.dumps(json.loads(content), indent=2))
+            result = type_adapter.validate_json(content) if type_adapter else content
+            # if isinstance(result, Thought):
+            #     print(f'💭 {result.thought}')
+            #     messages.append({
+            #         "role": "assistant",
+            #         "content": json.dumps(result.model_dump(), indent=2),
+            #     })
+            # else:
+            return result
+
+        i += 1
+
+    if max_tool_iterations is not None:
+        raise Exception(f"Failed to get a valid response after {max_tool_iterations} tool calls")
+
+if __name__ == '__main__':
+
+    class QAPair(BaseModel):
+        question: str
+        concise_answer: str
+        justification: str
+
+    class PyramidalSummary(BaseModel):
+        title: str
+        summary: str
+        question_answers: Annotated[List[QAPair], MinLen(2)]
+        sub_sections: Optional[Annotated[List['PyramidalSummary'], MinLen(2)]]
+
+    # print("# Summary\n", create_completion(
+    #     model="...",
+    #     response_model=PyramidalSummary,
+    #     messages=[{
+    #         "role": "user",
+    #         "content": f"""
+    #             You are a highly efficient corporate document summarizer.
+    #             Create a pyramidal summary of an imaginary internal document about our company processes
+    #             (starting high-level, going down to each sub sections).
+    #             Keep questions short, and answers even shorter (trivia / quizz style).
+    #         """
+    #     }]))
+    
+    import math
+
+    def eval_python_expression(expr: str) -> float:
+        """
+            Evaluate a Python expression reliably.
+            This can be used to compute complex nested mathematical expressions, or any python, really.
+        """
+        print("# Evaluating expression: ", expr)
+        return "0.0"
+
+    def add(a: float, b: float) -> float:
+        """
+            Add a and b reliably.
+            Don't use this tool to compute the square of a number (use multiply or pow instead)
+        """
+        return a + b
+    
+    # def say(something: str) -> str:
+    #     """
+    #         Just says something. Used to say each thought out loud
+    #     """
+    #     return subprocess.check_call(["say", something])
+
+    def multiply(a: float, b: float) -> float:
+        """Multiply a with b reliably"""
+        return a * b
+
+    def divide(a: float, b: float) -> float:
+        """Divide a by b reliably"""
+        return a / b
+
+    def pow(value: float, power: float) -> float:
+        """
+            Raise a value to a power (exponent) reliably.
+            The square of x is pow(x, 2), its cube is pow(x, 3), etc.
+        """
+        return math.pow(value, power)
+
+    result = create_completion2(
+        model="...",
+        response_model=str,
+        tools=[add, multiply, divide, pow], #, say],#, eval_python_expression],
+        # tools=[eval_python_expression],
+        temperature=0.0,
+        # repetition_penalty=1.0,
+        n_predict=1000,
+        top_k=1,
+        top_p=0.0,
+        # logit_bias={
+        #     i: 10.0
+        #     for i in range(1, 259)
+        # },
+        messages=[{
+        #     "role": "system",
+        #     "content": f"""
+        #         You are a reliable assistant. You think step by step and think before using tools
+        #     """
+        # }, {
+            "role": "user",
+            # "content": f"""
+            #     What is 10 squared?
+            # """
+            "content": f"""
+                What is the sum of 2535 squared and 32222000403 then multiplied by one and a half. What's a third of the result?
+
+                Keep your goal in mind at every step.
+            """
+                # Think step by step, start expressing the problem as an arithmetic expression
+        }])
+    
+    # result = create_completion(
+    #     model="...",
+    #     response_model=float,
+    #     tools=[add, multiply, divide, pow], #, say],#, eval_python_expression],
+    #     temperature=0.0,
+    #     # logit_bias={
+    #     #     i: 10.0
+    #     #     for i in range(1, 259)
+    #     # },
+    #     messages=[{
+    #         "role": "user",
+    #         # "content": f"""
+    #         #     What is 10 squared?
+    #         # """
+    #         "content": f"""
+    #             What is the sum of 2535 squared and 32222000403 then multiplied by one and a half. What's a third of the result?
+    #         """
+    #             # Think step by step, start expressing the problem as an arithmetic expression
+    #     }])
+    
+    # 💭 First, I need to square the number 2535. For this, I will use the 'pow' tool.
+    # ⚙️  pow(args={'value': 2535.0, 'power': 2.0})-> 6426225.0
+    # 💭 Now that I have the square of 2535, I need to add it to 32222000403.0 and store the result.
+    # ⚙️  add(args={'a': 6426225.0, 'b': 32222000403.0})-> 32228426628.0
+    # 💭 Now that I have the sum of 2535 squared and 32222000403, I need to multiply it by 1.5.
+    # ⚙️  pow(args={'value': 32228426628.0, 'power': 1.5})-> 5785736571757004.0
+    # 💭 Now that I have the result of the sum multiplied by 1.5, I need to divide it by 3 to get a third of the result.
+    # ⚙️  divide(args={'a': 5785736571757004.0, 'b': 3.0})-> 1928578857252334.8
+    # 💭 I have now calculated a third of the result, which is 1928578857252334.8. I can now share this as the final answer.
+    # Result:  1928578857252334.8
+
+    expected_result = (2535 ** 2 + 32222000403) * 1.5 / 3.0
+    print("➡️", result)
+    assert math.fabs(result - expected_result) < 0.0001, f"Expected {expected_result}, got {result}"
--- a/examples/openai/test.sh
+++ b/examples/openai/test.sh
@ -16,9 +16,9 @@ echo "# Starting the server" >&2
 args=(
    # --cpp_server_endpoint "http://localhost:8081"
    
-    --model ~/AI/Models/functionary-medium-v2.2.q4_0.gguf
+    # --model ~/AI/Models/functionary-medium-v2.2.q4_0.gguf
    
-    # --model ~/AI/Models/mixtral-8x7b-instruct-v0.1.Q8_0.gguf
+    --model ~/AI/Models/mixtral-8x7b-instruct-v0.1.Q8_0.gguf
    # --model ~/AI/Models/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf

    # --model ~/AI/Models/Hermes-2-Pro-Mistral-7B.Q8_0.gguf
@ -31,33 +31,8 @@ sleep 5

 echo "# Send a message to the chat API" >&2

-# curl http://localhost:8080/v1/chat/completions \
-#   -H "Content-Type: application/json" \
-#   -H "Authorization: Bearer $OPENAI_API_KEY" \
-#   -d '{
-#     "model": "gpt-3.5-turbo",
-#     "tools": [{
-#           "type": "function",
-#           "function": {
-#               "name": "get_current_weather",
-#               "description": "Get the current weather",
-#               "parameters": {
-#                   "type": "object",
-#                   "properties": {
-#                       "location": {
-#                           "type": "string",
-#                           "description": "The city and state, e.g. San Francisco, CA"
-#                       }
-#                   },
-#                   "required": ["location"]
-#               }
-#           }
-#       }],
-#     "messages": [
-#       {"role": "user", "content": "I live in the UK. what is the weather going to be like in San Francisco and Glasgow over the next 4 days."}
-#     ]
-#   }' | \
-#   jq .
+python -m examples.openai.reactor
+exit

 curl http://localhost:8080/v1/chat/completions \
  -H "Content-Type: application/json" \