From af0a9faf7f8bb2e3a29e5865b518fd3a8db8e17f Mon Sep 17 00:00:00 2001 From: Don Mahurin <2797413+dmahurin@users.noreply.github.com> Date: Sat, 21 Sep 2024 23:23:17 -0700 Subject: [PATCH 1/3] Add basic function calling example using a llama-cli python wrapper --- examples/function-calling/README.md | 46 +++++++ examples/function-calling/function_tool.py | 63 +++++++++ examples/function-calling/functions.py | 30 +++++ .../llama-cli-function-runner.py | 122 ++++++++++++++++++ 4 files changed, 261 insertions(+) create mode 100644 examples/function-calling/README.md create mode 100644 examples/function-calling/function_tool.py create mode 100644 examples/function-calling/functions.py create mode 100755 examples/function-calling/llama-cli-function-runner.py diff --git a/examples/function-calling/README.md b/examples/function-calling/README.md new file mode 100644 index 000000000..f431c9835 --- /dev/null +++ b/examples/function-calling/README.md @@ -0,0 +1,46 @@ +# llama.cpp/examples/function-calling + +This example shows how to do basic function calling using llama-cli and a python wrapper to declare and call functions. + +## Options + +Important options for llama-cli-function-runner.py: + +- `-m FNAME, --model FNAME`: Specify the path to the function calling model (e.g., `-m "$(huggingface-cli download meetkai/functionary-small-v3.2-GGUF functionary-small-v3.2.Q4_0.gguf)"`). +- `--ctx-size N`: Set the size of the prompt context. The default is 1024 +- `--special`: show special tokens and function calling details + +## Example showing showing function call details + +``` +./examples/function-calling/llama-cli-function-runner.py -m `huggingface-cli download meetkai/functionary-small-v3.2-GGUF functionary-small-v3.2.Q4_0.gguf` -i --special +What is the weather in Phoenix? +Sure, I'll look that up for you. Let me just check the current weather conditions in Phoenix.>>>get_weather +{"location": "Phoenix"}<|eot_id|> +{"temperature": "30C"}<|eot_id|><|start_header_id|>assistant<|end_header_id|> +The current weather in Phoenix is 30C.<|eot_id|> +What is 38484 + 323? +Sure, let's calculate that.>>>calculate +{"expression": "38484 + 323"}<|eot_id|> +{"result": 38807}<|eot_id|><|start_header_id|>assistant<|end_header_id|> +The sum of 38484 and 323 is 38807.<|eot_id|> +What is 67 feet in meters? +To convert 67 feet into meters, we use the conversion factor: 1 foot is approximately 0.3048 meters. Let's calculate it.>>>calculate +{"expression": "67 * 0.3048"}<|eot_id|> +{"result": 20.4216}<|eot_id|><|start_header_id|>assistant<|end_header_id|> +67 feet is approximately 20.4216 meters.<|eot_id|> +``` + +## Function calling example, hiding details +``` +./examples/function-calling/llama-cli-function-runner.py -m `huggingface-cli download meetkai/functionary-small-v3.2-GGUF functionary-small-v3.2.Q4_0.gguf` -i +What is the weather in Phoenix? +To provide you with the current weather in Phoenix, Arizona, I will need to check the weather data for you. Let me get that information. +The current weather in Phoenix, Arizona is 30°C. If you have any more questions about weather in other locations, feel free to ask! +Is it colder in Vegas? +To determine if the current temperature in Las Vegas is colder than in Phoenix, which is currently 30°C, I will need to check the weather data for Las Vegas. Let's find out. +The current weather in Las Vegas, Nevada is also 30°C. Therefore, there is no difference in temperature between Phoenix and Las Vegas at the moment. If you have any more questions or need further assistance, please let me know! +What is 37234 times 39? +To calculate 37234 times 39, I'll perform the multiplication. Let's do that. +The result of multiplying 37234 by 39 is 1,452,126. If you have any more calculations or questions, feel free to ask! +``` diff --git a/examples/function-calling/function_tool.py b/examples/function-calling/function_tool.py new file mode 100644 index 000000000..da1598e29 --- /dev/null +++ b/examples/function-calling/function_tool.py @@ -0,0 +1,63 @@ +# Generate function calling definitions function schemas + +import inspect +import re + +# Extract OpenAI function calling style definitions from functions +# +# Generated with: Create a python function to to generate the OpenAI function calling definition from a given function, getting the description, parameter type and parameter description from the function documentation, assuming the function documentation contains sphynx style parameter descriptions, marked with :param. +def get_function_tool_json(func): + typemap = { 'str': 'string' }; + def get_type(s): + return typemap[s] if s in typemap else s + + function_name = func.__name__ + doc_parts = re.split(r'\n\s*:param[^:]*\s+', func.__doc__.rstrip()); + + function_description = doc_parts[0] + params_doc = [ re.split(r'\:\s*', param_doc, maxsplit=1) for param_doc in doc_parts[1:] ] + params_doc = { param: desc for param, desc in params_doc } + + function_def = { + 'name': function_name, + 'description': function_description, + 'parameters': { 'type': 'object', 'properties': {}, 'required': [] } + } + + for param_name, param in inspect.signature(func).parameters.items(): + function_def['parameters']['properties'][param_name] = { + 'type' : get_type(param.annotation.__name__) if param.annotation is not param.empty else '', + 'description': params_doc[param_name] if param_name in params_doc else '' + } + function_def['parameters']['required'].append(param_name); + + return function_def + +# Generate function definition schema from function definitions +# +# This is from llama-cpp-python, llama_chat_format.py +def generate_schema_from_functions(functions, namespace="functions") -> str: + schema = ( + "// Supported function definitions that should be called when necessary.\n" + ) + schema += f"namespace {namespace} {{\n\n" + + for function in functions: + function_name = function["name"] + description = function.get("description", "") + parameters = function.get("parameters", {}) + required_params = parameters.get("required", []) + + schema += f"// {description}\n" + schema += f"type {function_name} = (_: {{\n" + + for param_name, param in parameters.get("properties", {}).items(): + param_description = param.get("description", "") + param_type = param.get("type", "any") + optional_indicator = "" if param_name in required_params else "?" + schema += f"// {param_description}\n" + schema += f"{param_name}{optional_indicator}: {param_type},\n" + schema += "}) => any;\n\n" + + schema += "}} // namespace {}".format(namespace) + return schema diff --git a/examples/function-calling/functions.py b/examples/function-calling/functions.py new file mode 100644 index 000000000..58dc57ae0 --- /dev/null +++ b/examples/function-calling/functions.py @@ -0,0 +1,30 @@ +def calculate(expression: str): + """Evaluate a mathematical expression + :param expression: The mathematical expression to evaluate + """ + try: + result = eval(expression) + return {"result": result} + except: + return {"error": "Invalid expression"} + +def get_weather(location: str): + """get the weather of a location + :param location: where to get weather. + """ + return {"temperature": "30C"} + +def _run_python(code): + allowed_globals = { '__builtins__': None, '_': None } + allowed_locals = {} + + code = code.splitlines() + code[-1] = f"_ = {code[-1]}" + code = '\n'.join(code) + + try: + exec(code, allowed_globals, allowed_locals) + except Exception as e: + return None + + return {'result': allowed_locals.get('_', None)} diff --git a/examples/function-calling/llama-cli-function-runner.py b/examples/function-calling/llama-cli-function-runner.py new file mode 100755 index 000000000..cc452a9ed --- /dev/null +++ b/examples/function-calling/llama-cli-function-runner.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +# function calling using llama-cli + +import subprocess +import sys +import select +import os +import re + +import json + +import functions +from function_tool import get_function_tool_json, generate_schema_from_functions + +function_name_list = [ name for name in dir(functions) if not name.startswith('_') ] +function_lookup = { name: getattr(functions, name) for name in function_name_list } +tools = [ get_function_tool_json(f) for (n, f) in function_lookup.items() ] +function_schema = generate_schema_from_functions(tools) + +prompt = """<|start_header_id|>system<|end_header_id|> + +You are capable of executing available function(s) if required. +Execute function(s) as needed. +The function calls are not shown in the conversation and should be called covertly to answer questions. +Ask for the required input to:recipient==all +Use JSON for function arguments. +Respond in this format: +>>>${recipient} +${content} +Available functions: +""" + function_schema + """<|eot_id|><|start_header_id|>system<|end_header_id|> + +When you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 60.0 seconds. The drive at '/mnt/data' can be used to save and persist user files.<|eot_id|><|start_header_id|>user<|end_header_id|> +""" + +def main(): + import argparse + + parser = argparse.ArgumentParser(epilog='For more options: llama-cli --help') + parser.add_argument('--display-prompt', action=argparse.BooleanOptionalAction, default=False) + parser.add_argument('--special', action=argparse.BooleanOptionalAction, default=False) + parser.add_argument('--reverse-prompt', type=str, default='<|start_header_id|>user<|end_header_id|>\n') + parser.add_argument('--ctx-size', type=int, default=1024) + args, other_args = parser.parse_known_args() + + if args.display_prompt: print(prompt) + + command = [ './llama-cli', '-i', '-p', prompt, '--reverse-prompt', args.reverse_prompt, '--escape', '--special', '--no-display-prompt', '--log-disable', '--simple-io', '--ctx-size', str(args.ctx_size), *other_args] + + process = subprocess.Popen( + command, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + if process.stdout is not None: os.set_blocking(process.stdout.fileno(), False) + + try: + run_loop(process, args) + except KeyboardInterrupt: + print("\nInterrupted by user.") + finally: + process.terminate() + process.wait() + +def run_loop(process, args): + pbuffer = '' + skip_output_until_result = False + while True: + readable, _, _ = select.select([process.stdout, process.stderr, sys.stdin], [], []) + + for stream in readable: + if stream == process.stdout: + pdata = process.stdout.read() + if not pdata: continue + pbuffer += pdata + + if(match := re.search(r'>>>([^\n]*)\n(.*)<\|eot_id\|>', pbuffer, re.S)): + if not args.special: + pdata = pdata[:match.pos] + pbuffer = '' + skip_output_until_result = False + + tool_name = match.group(1) + tool_args = match.group(2) + + if tool_name == 'python': + result = functions._run_python(tool_args); + else: + try: + tool_args = json.loads(tool_args) + result = function_lookup[tool_name](**tool_args) + except ValueError as e: + result = {'error': 'unknown'} + + result = json.dumps(result) + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>' + process.stdin.write(result + '\n') + process.stdin.flush() + if(args.special): pdata += '\n' + result + elif (n := pdata.find('>>>')) >= 0: + if not args.special: + pdata = pdata[:n] + skip_output_until_result = True + elif skip_output_until_result: + pdata = '' + + if not args.special: + pdata = re.sub(r'<\|[^\|>]*\|>', '', pdata) + sys.stdout.write(pdata) + sys.stdout.flush() + + elif stream == sys.stdin: + user_input = sys.stdin.readline() + if user_input: + user_input = user_input.rstrip() + process.stdin.write(user_input + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>' + '\n') + process.stdin.flush() + +if __name__ == '__main__': + main() + From 8550b76f4e69f9b0e2d1497746adfe4198a83d2e Mon Sep 17 00:00:00 2001 From: Don Mahurin <2797413+dmahurin@users.noreply.github.com> Date: Sat, 28 Sep 2024 14:10:55 -0700 Subject: [PATCH 2/3] Move function format specification to function_tool.py --- examples/function-calling/function_tool.py | 32 +++++++++- .../llama-cli-function-runner.py | 64 ++++++++----------- 2 files changed, 58 insertions(+), 38 deletions(-) diff --git a/examples/function-calling/function_tool.py b/examples/function-calling/function_tool.py index da1598e29..850a1f715 100644 --- a/examples/function-calling/function_tool.py +++ b/examples/function-calling/function_tool.py @@ -3,6 +3,8 @@ import inspect import re +import json + # Extract OpenAI function calling style definitions from functions # # Generated with: Create a python function to to generate the OpenAI function calling definition from a given function, getting the description, parameter type and parameter description from the function documentation, assuming the function documentation contains sphynx style parameter descriptions, marked with :param. @@ -36,7 +38,7 @@ def get_function_tool_json(func): # Generate function definition schema from function definitions # # This is from llama-cpp-python, llama_chat_format.py -def generate_schema_from_functions(functions, namespace="functions") -> str: +def generate_functionary_schema_from_functions(functions, namespace="functions") -> str: schema = ( "// Supported function definitions that should be called when necessary.\n" ) @@ -61,3 +63,31 @@ def generate_schema_from_functions(functions, namespace="functions") -> str: schema += "}} // namespace {}".format(namespace) return schema + +functionary_prompt_start = """<|start_header_id|>system<|end_header_id|> + +You are capable of executing available function(s) if required. +Execute function(s) as needed. +The function calls are not shown in the conversation and should be called covertly to answer questions. +Ask for the required input to:recipient==all +Use JSON for function arguments. +Respond in this format: +>>>${recipient} +${content} +Available functions: +""" +functionary_prompt_end = """<|eot_id|><|start_header_id|>system<|end_header_id|> + +When you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 60.0 seconds. The drive at '/mnt/data' can be used to save and persist user files.<|eot_id|><|start_header_id|>user<|end_header_id|> +""" + +def get_chat_tool_format(args, tools): + return { + 'prompt': functionary_prompt_start + generate_functionary_schema_from_functions(tools) + functionary_prompt_end, + 'function_marker': '>>>', + 'function_re': r'>>>([^\n]*)\n(.*)<\|eot_id\|>', + 'user_start': '<|start_header_id|>user<|end_header_id|>\n', + 'user_end': '<|eot_id|><|start_header_id|>assistant<|end_header_id|>' + '\n', + 'tool_start': '', + 'tool_end': '<|eot_id|><|start_header_id|>assistant<|end_header_id|>' + } diff --git a/examples/function-calling/llama-cli-function-runner.py b/examples/function-calling/llama-cli-function-runner.py index cc452a9ed..473139bdb 100755 --- a/examples/function-calling/llama-cli-function-runner.py +++ b/examples/function-calling/llama-cli-function-runner.py @@ -10,28 +10,11 @@ import re import json import functions -from function_tool import get_function_tool_json, generate_schema_from_functions +from function_tool import get_function_tool_json, get_chat_tool_format function_name_list = [ name for name in dir(functions) if not name.startswith('_') ] function_lookup = { name: getattr(functions, name) for name in function_name_list } tools = [ get_function_tool_json(f) for (n, f) in function_lookup.items() ] -function_schema = generate_schema_from_functions(tools) - -prompt = """<|start_header_id|>system<|end_header_id|> - -You are capable of executing available function(s) if required. -Execute function(s) as needed. -The function calls are not shown in the conversation and should be called covertly to answer questions. -Ask for the required input to:recipient==all -Use JSON for function arguments. -Respond in this format: ->>>${recipient} -${content} -Available functions: -""" + function_schema + """<|eot_id|><|start_header_id|>system<|end_header_id|> - -When you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 60.0 seconds. The drive at '/mnt/data' can be used to save and persist user files.<|eot_id|><|start_header_id|>user<|end_header_id|> -""" def main(): import argparse @@ -39,13 +22,17 @@ def main(): parser = argparse.ArgumentParser(epilog='For more options: llama-cli --help') parser.add_argument('--display-prompt', action=argparse.BooleanOptionalAction, default=False) parser.add_argument('--special', action=argparse.BooleanOptionalAction, default=False) - parser.add_argument('--reverse-prompt', type=str, default='<|start_header_id|>user<|end_header_id|>\n') + parser.add_argument('--reverse-prompt', type=str) parser.add_argument('--ctx-size', type=int, default=1024) args, other_args = parser.parse_known_args() - if args.display_prompt: print(prompt) + tool_format = get_chat_tool_format(args, tools) + if args.reverse_prompt is None: args.reverse_prompt = tool_format['user_start'] - command = [ './llama-cli', '-i', '-p', prompt, '--reverse-prompt', args.reverse_prompt, '--escape', '--special', '--no-display-prompt', '--log-disable', '--simple-io', '--ctx-size', str(args.ctx_size), *other_args] + if args.display_prompt: print(tool_format['prompt']) + + command = [ './llama-cli', '-i', '-p', tool_format['prompt'], '--reverse-prompt', args.reverse_prompt, '--escape', '--special', '--no-display-prompt', '--log-disable', '--simple-io', '--ctx-size', str(args.ctx_size), *other_args] + print("'" + "' '".join(command) + "'") process = subprocess.Popen( command, @@ -57,14 +44,14 @@ def main(): if process.stdout is not None: os.set_blocking(process.stdout.fileno(), False) try: - run_loop(process, args) + run_loop(process, args, tool_format) except KeyboardInterrupt: print("\nInterrupted by user.") finally: process.terminate() process.wait() -def run_loop(process, args): +def run_loop(process, args, tool_format): pbuffer = '' skip_output_until_result = False while True: @@ -76,29 +63,32 @@ def run_loop(process, args): if not pdata: continue pbuffer += pdata - if(match := re.search(r'>>>([^\n]*)\n(.*)<\|eot_id\|>', pbuffer, re.S)): + if(match := re.search(tool_format['function_re'], pbuffer, re.S)): if not args.special: pdata = pdata[:match.pos] pbuffer = '' skip_output_until_result = False + try: + if 1 < len(match.groups()): + tool_name = match.group(1) + tool_args = json.loads(match.group(2)) + else: + tool = json.loads(match.group(1)) + tool_name = tool['name'] + tool_args = tool['arguments'] - tool_name = match.group(1) - tool_args = match.group(2) - - if tool_name == 'python': - result = functions._run_python(tool_args); - else: - try: - tool_args = json.loads(tool_args) + if tool_name == 'python': + result = functions._run_python(tool_args); + else: result = function_lookup[tool_name](**tool_args) - except ValueError as e: - result = {'error': 'unknown'} + except ValueError as e: + result = {'error': 'unknown'} - result = json.dumps(result) + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>' + result = tool_format['tool_start'] + json.dumps(result) + tool_format['tool_end'] process.stdin.write(result + '\n') process.stdin.flush() if(args.special): pdata += '\n' + result - elif (n := pdata.find('>>>')) >= 0: + elif (n := pdata.find(tool_format['function_marker'])) >= 0: if not args.special: pdata = pdata[:n] skip_output_until_result = True @@ -114,7 +104,7 @@ def run_loop(process, args): user_input = sys.stdin.readline() if user_input: user_input = user_input.rstrip() - process.stdin.write(user_input + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>' + '\n') + process.stdin.write(user_input + tool_format['user_end'] + '\n') process.stdin.flush() if __name__ == '__main__': From b790a7ff290f64f865e0c66602a98ba70a901578 Mon Sep 17 00:00:00 2001 From: Don Mahurin <2797413+dmahurin@users.noreply.github.com> Date: Sat, 28 Sep 2024 14:10:55 -0700 Subject: [PATCH 3/3] Allow simpler function calling sytax, like used with Phi-3 function calling model --- examples/function-calling/README.md | 5 +++ examples/function-calling/function_tool.py | 35 ++++++++++++++----- .../llama-cli-function-runner.py | 4 +-- 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/examples/function-calling/README.md b/examples/function-calling/README.md index f431c9835..e245747b1 100644 --- a/examples/function-calling/README.md +++ b/examples/function-calling/README.md @@ -44,3 +44,8 @@ What is 37234 times 39? To calculate 37234 times 39, I'll perform the multiplication. Let's do that. The result of multiplying 37234 by 39 is 1,452,126. If you have any more calculations or questions, feel free to ask! ``` + +## Function calling example, using Phi-3 function calling +``` +./examples/function-calling/llama-cli-function-runner.py -m `huggingface-cli download nold/Phi-3-mini-4k-instruct-function-calling-GGUF Phi-3-mini-4k-instruct-function-calling_Q4_K_M.gguf` --special --display-prompt -i +``` diff --git a/examples/function-calling/function_tool.py b/examples/function-calling/function_tool.py index 850a1f715..f92d81ec1 100644 --- a/examples/function-calling/function_tool.py +++ b/examples/function-calling/function_tool.py @@ -64,6 +64,9 @@ def generate_functionary_schema_from_functions(functions, namespace="functions") schema += "}} // namespace {}".format(namespace) return schema +def generate_simple_schema_from_functions(functions) -> str: + return '\n'.join([json.dumps(function).replace('{', '{ ').replace('}', ' }') for function in functions]) + functionary_prompt_start = """<|start_header_id|>system<|end_header_id|> You are capable of executing available function(s) if required. @@ -81,13 +84,27 @@ functionary_prompt_end = """<|eot_id|><|start_header_id|>system<|end_header_id|> When you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 60.0 seconds. The drive at '/mnt/data' can be used to save and persist user files.<|eot_id|><|start_header_id|>user<|end_header_id|> """ +simple_prompt_start = """<|user|> You are a helpful assistant with access to the following functions. Use them if required - """ +simple_prompt_end = """<|end|>""" + def get_chat_tool_format(args, tools): - return { - 'prompt': functionary_prompt_start + generate_functionary_schema_from_functions(tools) + functionary_prompt_end, - 'function_marker': '>>>', - 'function_re': r'>>>([^\n]*)\n(.*)<\|eot_id\|>', - 'user_start': '<|start_header_id|>user<|end_header_id|>\n', - 'user_end': '<|eot_id|><|start_header_id|>assistant<|end_header_id|>' + '\n', - 'tool_start': '', - 'tool_end': '<|eot_id|><|start_header_id|>assistant<|end_header_id|>' - } + if 'functionary' in args.model.lower(): + return { + 'prompt': functionary_prompt_start + generate_functionary_schema_from_functions(tools) + functionary_prompt_end, + 'function_marker': '>>>', + 'function_re': r'>>>([^\n]*)\n(.*)<\|eot_id\|>', + 'user_start': '<|start_header_id|>user<|end_header_id|>\n', + 'user_end': '<|eot_id|><|start_header_id|>assistant<|end_header_id|>' + '\n', + 'tool_start': '', + 'tool_end': '<|eot_id|><|start_header_id|>assistant<|end_header_id|>' + } + else: + return { + 'prompt': simple_prompt_start + generate_simple_schema_from_functions(tools) + simple_prompt_end, + 'function_marker': '', + 'function_re': r' \n?(.*)<\|end\|>', + 'user_start': '<|user|> ', + 'user_end': '<|end|>' + '\n', + 'tool_start': '<|user|>', + 'tool_end': '<|end|> <|assistant|>' + } diff --git a/examples/function-calling/llama-cli-function-runner.py b/examples/function-calling/llama-cli-function-runner.py index 473139bdb..066363b69 100755 --- a/examples/function-calling/llama-cli-function-runner.py +++ b/examples/function-calling/llama-cli-function-runner.py @@ -23,6 +23,7 @@ def main(): parser.add_argument('--display-prompt', action=argparse.BooleanOptionalAction, default=False) parser.add_argument('--special', action=argparse.BooleanOptionalAction, default=False) parser.add_argument('--reverse-prompt', type=str) + parser.add_argument('-m', '--model', type=str, default='model.gguf') parser.add_argument('--ctx-size', type=int, default=1024) args, other_args = parser.parse_known_args() @@ -31,8 +32,7 @@ def main(): if args.display_prompt: print(tool_format['prompt']) - command = [ './llama-cli', '-i', '-p', tool_format['prompt'], '--reverse-prompt', args.reverse_prompt, '--escape', '--special', '--no-display-prompt', '--log-disable', '--simple-io', '--ctx-size', str(args.ctx_size), *other_args] - print("'" + "' '".join(command) + "'") + command = [ './llama-cli', '-i', '-p', tool_format['prompt'], '--model', args.model, '--reverse-prompt', args.reverse_prompt, '--escape', '--special', '--no-display-prompt', '--log-disable', '--simple-io', '--ctx-size', str(args.ctx_size), *other_args] process = subprocess.Popen( command,