diff --git a/examples/openai/api.py b/examples/openai/api.py index 0d7ddc111..dd8da09a2 100644 --- a/examples/openai/api.py +++ b/examples/openai/api.py @@ -1,5 +1,5 @@ from typing import Any, Dict, Literal, Optional, Union -from pydantic import BaseModel, Json +from pydantic import BaseModel, Json, TypeAdapter class FunctionCall(BaseModel): name: str @@ -31,10 +31,33 @@ class ResponseFormat(BaseModel): class ChatCompletionRequest(BaseModel): model: str tools: Optional[list[Tool]] = None - messages: list[Message] + messages: list[Message] = None + prompt: Optional[str] = None response_format: Optional[ResponseFormat] = None - temperature: float = 1.0 + stream: bool = False + cache_prompt: Optional[bool] = None + n_predict: Optional[int] = None + top_k: Optional[int] = None + top_p: Optional[float] = None + min_p: Optional[float] = None + tfs_z: Optional[float] = None + typical_p: Optional[float] = None + temperature: float = 1.0 + dynatemp_range: Optional[float] = None + dynatemp_exponent: Optional[float] = None + repeat_last_n: Optional[int] = None + repeat_penalty: Optional[float] = None + frequency_penalty: Optional[float] = None + presense_penalty: Optional[float] = None + mirostat: Optional[bool] = None + mirostat_tau: Optional[float] = None + mirostat_eta: Optional[float] = None + penalize_nl: Optional[bool] = None + n_keep: Optional[int] = None + seed: Optional[int] = None + n_probs: Optional[int] = None + min_keep: Optional[int] = None class Choice(BaseModel): index: int diff --git a/examples/openai/prompting.py b/examples/openai/prompting.py index ab3ce89a8..e26ca9229 100644 --- a/examples/openai/prompting.py +++ b/examples/openai/prompting.py @@ -41,7 +41,7 @@ class ChatFormat: system_message = next(((i, m) for i, m in enumerate(messages) if m.role == "system"), None) if system_message is not None: (i, m) = system_message - return messages[:i] + [Message(role="system", content=m.content + '\n' + system_prompt.content)] + messages[i+1:] + return messages[:i] + [Message(role="system", content=system_prompt.content + '\n' + m.content)] + messages[i+1:] else: return [system_prompt] + messages @@ -63,8 +63,16 @@ class ChatFormat: assert messages[i+1].role == 'user' new_messages.append(Message( role="user", - content=f'[SYS]{messages[i].content}[/SYS]\n{messages[i+1].content}')) + content=f'[SYS]{messages[i].content}[/SYS]\n{messages[i+1].content}' + )) i += 2 + elif messages[i].role == 'assistant' and messages[i].tool_calls and messages[i].content: + tc = '\n'.join(f'{json.dumps(tc.model_dump())}' for tc in messages[i].tool_calls) + new_messages.append(Message( + role="assistant", + content=f'{messages[i].content}\n{tc}' + )) + i += 1 else: new_messages.append(messages[i]) i += 1 @@ -72,13 +80,15 @@ class ChatFormat: messages = new_messages # print(f'messages={messages}') - return self.template.render( + result = self.template.render( messages=messages, eos_token=self.eos_token, bos_token='' if omit_bos else self.bos_token, raise_exception=raise_exception, add_generation_prompt=add_generation_prompt, ) + sys.stderr.write(f'\n# RENDERED:\n\n{result}\n\n') + return result # While the API will be usable with a generic tools usage like OpenAI, # (see https://cookbook.openai.com/examples/how_to_call_functions_with_chat_models), @@ -120,38 +130,29 @@ def make_tools_prompt(chat_format: ChatFormat, tools: list[Tool], indent=2) -> M return Message( role="system", content='\n'.join([ - '''You are a function calling AI model. You are provided with function signatures within XML tags.''', + # '''You are a function calling AI model. You are provided with function signatures within XML tags.''', '''You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools:''', '''''', - *(json.dumps(tool.model_dump(), indent=indent) for tool in tools), + _tools_typescript_signatures(tools), + # _tools_schema_signatures(tools, indent=indent), '''''', '', - '''Use the following json schema for each tool call you will make: {"properties": {"arguments": {"title": "Arguments", "type": "object"}, "name": {"title": "Name", "type": "string"}}, "required": ["arguments", "name"], "title": "FunctionCall", "type": "object"}''', - '', + # '''Use the following json schema for each tool call you will make: {"properties": {"arguments": {"title": "Arguments", "type": "object"}, "name": {"title": "Name", "type": "string"}}, "required": ["arguments", "name"], "title": "FunctionCall", "type": "object"}''', + # '', # '''For each function call return a json object with function name and arguments within XML tags as follows:''', '''To call each function, give its name and arguments within XML tags as follows:''', '''''', - '''{"arguments": , "name": }''', + '''{"name": , "arguments": }''', '''''', - '''This is not hypothetical, you're not asked what you would do. If you need a tool called, just call it.''', + # '''This is not hypothetical, you're not asked what you would do. If you need a tool called, just call it with ....''', ]) ) elif chat_format.tool_style == ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2: - ts_converter = SchemaToTypeScriptConverter() - return Message( role="system", - content='\n'.join([ - '// Supported function definitions that should be called when necessary.' - 'namespace functions {', - *[ - '// ' + tool.function.description.replace('\n', '\n// ') + '\n' + '' - 'type ' + tool.function.name + ' = (_: ' + ts_converter.visit(tool.function.parameters) + ") => any;\n" - for tool in tools - ], - '} // namespace functions', - ]) + content= '// Supported function definitions that should be called when necessary.\n' + + _tools_typescript_signatures(tools) ) elif chat_format.tool_style == ToolsPromptStyle.TOOLS_HERMES_2_PRO: @@ -170,6 +171,20 @@ def make_tools_prompt(chat_format: ChatFormat, tools: list[Tool], indent=2) -> M else: raise ValueError(f"Unsupported tool call style: {chat_format.tool_style}") +def _tools_typescript_signatures(tools: list[Tool]) -> str: + ts_converter = SchemaToTypeScriptConverter() + return 'namespace functions {' + '\n'.join( + '// ' + tool.function.description.replace('\n', '\n// ') + '\n' + '' + 'type ' + tool.function.name + ' = (_: ' + ts_converter.visit(tool.function.parameters) + ") => any;\n" + for tool in tools + ) + '} // namespace functions' + +def _tools_schema_signatures(tools: list[Tool], indent=None) -> str: + return '\n'.join( + json.dumps(tool.model_dump(), indent=indent) + for tool in tools + ) + @typechecked def _outputs_tool_call_tags(style: ToolsPromptStyle) -> bool: return style in ( @@ -199,6 +214,8 @@ def make_grammar(chat_format: ChatFormat, tools: list[Tool], response_schema: Op assert planted_prompt.startswith(empty_prompt), f"Planted prompt does not start with empty prompt: {planted_prompt} vs {empty_prompt}" [prefix, suffix] = planted_prompt[len(empty_prompt):].split(delimiter) + allow_parallel_calls = False + def strip_suffix(s: str) -> str: if s.endswith(suffix): return s[:-len(suffix)] @@ -235,17 +252,19 @@ def make_grammar(chat_format: ChatFormat, tools: list[Tool], response_schema: Op tool_call_rule = converter._add_rule( 'tool_call', - format_literal("") + " (" + + format_literal("") + " space (" + ' | '.join(tool_rules) + - ") " + format_literal("")) + ") space " + format_literal(""))# + ' space') # Ideally we'd want a negative lookahead of //, but it's just too hard to express in GBNF for now. # So we just over-constrain the content rule to not contain literals dangerously getting close to - content_rule = converter._add_rule('content', '[^<] | "<" [^t<]? | "')) converter._add_rule( 'root', - f'{content_rule}* ({tool_call_rule}+ {content_rule}*)?') + # tool_call_rule) + f'{content_rule}* ({tool_call_rule}+ {content_rule}*)?' if allow_parallel_calls \ + else f'{content_rule}* {tool_call_rule}?') # # Constrain the output to be a non-tool-call message (constrained to a JSON schema or not) # # OR a tool-call message respecting the schema of any of the tools @@ -285,7 +304,7 @@ def make_grammar(chat_format: ChatFormat, tools: list[Tool], response_schema: Op id=gen_callid(), function=FunctionCall(**fc))) - content = '(...)'.join(content).strip() + content = '\n'.join(content).strip() return Message(role="assistant", content=content if content else None, tool_calls=tool_calls) # if ''.startswith(ls) or ls.startswith(''): @@ -338,7 +357,8 @@ def make_grammar(chat_format: ChatFormat, tools: list[Tool], response_schema: Op converter._add_rule( 'root', f'{content_without_start_rule} {content_rule}* ({tool_call_rule}+ {content_rule}*)? | ' - f'{tool_call_without_start_rule} {tool_call_rule}* {content_rule}*') + f'{tool_call_without_start_rule} {tool_call_rule}* {content_rule}*' if allow_parallel_calls \ + else f'{content_without_start_rule} {tool_call_rule}? | {tool_call_without_start_rule}') # converter._add_rule( # "root", diff --git a/examples/openai/server.py b/examples/openai/server.py index 21c30623d..ad3910625 100644 --- a/examples/openai/server.py +++ b/examples/openai/server.py @@ -59,8 +59,9 @@ def main( async def chat_completions(request: Request, chat_request: ChatCompletionRequest): headers = { "Content-Type": "application/json", - "Authorization": request.headers.get("Authorization"), } + if (auth := request.headers.get("Authorization")): + headers["Authorization"] = auth if chat_request.response_format is not None: assert chat_request.response_format.type == "json_object", f"Unsupported response format: {chat_request.response_format.type}" @@ -75,18 +76,31 @@ def main( (grammar, parser) = make_grammar(chat_format, chat_request.tools, response_schema) # TODO: Test whether the template supports formatting tool_calls - sys.stderr.write(f'\n{grammar}\n\n') prompt = chat_format.render(messages, add_generation_prompt=True) + + sys.stderr.write(f'\n# PROMPT:\n\n{prompt}\n\n') + sys.stderr.write(f'\n# GRAMMAR:\n\n{grammar}\n\n') + + data = LlamaCppServerCompletionRequest( + **{ + k: v + for k, v in chat_request.model_dump().items() + if k not in ( + "prompt", + "tools", + "messages", + "response_format", + ) + }, + prompt=prompt, + grammar=grammar, + ).model_dump() + sys.stderr.write(json.dumps(data, indent=2) + "\n") async with httpx.AsyncClient() as client: response = await client.post( f"{cpp_server_endpoint}/completions", - json=LlamaCppServerCompletionRequest( - prompt=prompt, - stream=chat_request.stream, - n_predict=1000, - grammar=grammar, - ).model_dump(), + json=data, headers=headers, timeout=None) @@ -96,11 +110,11 @@ def main( return StreamingResponse(generate_chunks(response), media_type="text/event-stream") else: result = response.json() + sys.stderr.write("# RESULT:\n\n" + json.dumps(result, indent=2) + "\n\n") if 'content' not in result: # print(json.dumps(result, indent=2)) return JSONResponse(result) - sys.stderr.write(json.dumps(result, indent=2) + "\n") # print(json.dumps(result.get('content'), indent=2)) message = parser(result["content"]) assert message is not None, f"Failed to parse response:\n{response.text}\n\n" diff --git a/examples/openai/ts_converter.py b/examples/openai/ts_converter.py index c0d99d0a4..e29e83507 100644 --- a/examples/openai/ts_converter.py +++ b/examples/openai/ts_converter.py @@ -14,12 +14,21 @@ class SchemaToTypeScriptConverter: # // where to get weather. # location: string, # }) => any; + def _desc_comment(self, schema: dict): + desc = schema.get("description", "").replace("\n", "\n// ") if 'description' in schema else None + return f'// {desc}\n' if desc else '' + def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], additional_properties: Union[bool, Any]): + if additional_properties == True: + additional_properties = {} + elif additional_properties == False: + additional_properties = None + return "{" + ', '.join([ - f'{prop_name}{"" if prop_name in required else "?"}: {self.visit(prop_schema)}' + f'{self._desc_comment(prop_schema)}{prop_name}{"" if prop_name in required else "?"}: {self.visit(prop_schema)}' for prop_name, prop_schema in properties ] + ( - [f"[key: string]: {self.visit(additional_properties)}"] + [f"{self._desc_comment(additional_properties) if additional_properties else ''}[key: string]: {self.visit(additional_properties)}"] if additional_properties is not None else [] )) + "}"