diff --git a/examples/openai/prompting.py b/examples/openai/prompting.py
index ea6572d7b..d8ecd5174 100644
--- a/examples/openai/prompting.py
+++ b/examples/openai/prompting.py
@@ -128,10 +128,12 @@ def make_tools_prompt(chat_format: ChatFormat, tools: list[Tool], indent=2) -> M
                 '',
                 '''Use the following json schema for each tool call you will make: {"properties": {"arguments": {"title": "Arguments", "type": "object"}, "name": {"title": "Name", "type": "string"}}, "required": ["arguments", "name"], "title": "FunctionCall", "type": "object"}''',
                 '',
-                '''For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:''',
+                # '''For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:''',
+                '''To call each function, give its name and arguments within <tool_call></tool_call> XML tags as follows:''',
                 '''<tool_call>''',
                 '''{"arguments": <args-dict>, "name": <function-name>}''',
                 '''</tool_call>''',
+                '''This is not hypothetical, you're not asked what you would do. If you need a tool called, just call it.''',
             ])
         )
     
@@ -201,17 +203,21 @@ def make_grammar(chat_format: ChatFormat, tools: list[Tool], response_schema: Op
         if s.endswith(suffix):
             return s[:-len(suffix)]
         else:
-            print(f"Expected suffix ({suffix}) not found: {s}")
+            sys.stderr.write(f"Expected suffix ({suffix}) not found: {s}\n")
             return s
 
     if tools:
         if _outputs_tool_call_tags(chat_format.tool_style):
+
+            escapes_underscores = chat_format.tool_style != ToolsPromptStyle.TOOLS_HERMES_2_PRO
+
             tool_rules = [
                 converter.visit(
                     dict(
                         type="object",
                         properties=dict(
-                            name=dict(const=tool.function.name),
+                            name=dict(type="string", pattern='^' + tool.function.name.replace('_', f'\\?_') + '$') if escapes_underscores \
+                                else dict(const=tool.function.name),
                             arguments=tool.function.parameters,
                         ),
                         required=['name', 'arguments']
@@ -221,22 +227,45 @@ def make_grammar(chat_format: ChatFormat, tools: list[Tool], response_schema: Op
                 for tool in tools
             ]
 
-            # Constrain the output to be a non-tool-call message (constrained to a JSON schema or not)
-            # OR a tool-call message respecting the schema of any of the tools
+            def format_literal(s: str) -> str:
+                if escapes_underscores:
+                    return ' "\\\\"? "_" '.join((converter._format_literal(part) for part in s.split('_')))
+                else:
+                    return converter._format_literal(s)
+
+            tool_call_rule = converter._add_rule(
+                'tool_call',
+                format_literal("<tool_call>") + " (" +
+                ' | '.join(tool_rules) +
+                ") " + format_literal("</tool_call>"))
+            
+            # Ideally we'd want a negative lookahead of /<tool\\?_call>/, but it's just too hard to express in GBNF for now.
+            # So we just over-constrain the content rule to not contain literals dangerously getting close to <tool_call>
+            content_rule = converter._add_rule('content', '[^<] | "<" [^t<]? | "<t" [^o<]?')
+            # content_rule = converter._add_rule('content', converter.not_literal('<tool_call>'))
             converter._add_rule(
-                "root", 
-                converter._format_literal(prefix) + " (" +
-                    (response_rule or converter.not_literal("<tool_call>")) + " | " +
-                    converter._format_literal("<tool_call>") + " (" +
-                    ' | '.join(tool_rules) +
-                    ") " + converter._format_literal("</tool_call>") +
-                ")") # + converter._format_literal(suffix))
+                'root',
+                f'{content_rule}* ({tool_call_rule}+ {content_rule}*)?')
+          
+            # # Constrain the output to be a non-tool-call message (constrained to a JSON schema or not)
+            # # OR a tool-call message respecting the schema of any of the tools
+            # converter._add_rule(
+            #     "root", 
+            #     converter._format_literal(prefix) + " (" +
+            #         (response_rule or converter.not_literal("<tool_call>")) + " | " +
+            #         converter._format_literal("<tool_call>") + " (" +
+            #         ' | '.join(tool_rules) +
+            #         ") " + converter._format_literal("</tool_call>") +
+            #     ")") # + converter._format_literal(suffix))
             
             @typechecked
             def parse(s: str) -> Optional[Message]:
                 s = strip_suffix(s)
 
-                # ls = s.lstrip()
+                if r'<tool\_call>' in s:
+                    # Some weird escaping of underscores is happening w/ Mixtral 8x7B Instruct
+                    s = s.replace(r'\_', '_')
+
                 parts = _tool_call_re.split(s)
                 if len(parts) == 1:
                     return Message(role="assistant", content=s)
@@ -247,13 +276,17 @@ def make_grammar(chat_format: ChatFormat, tools: list[Tool], response_schema: Op
                         if i % 2 == 0:
                             content.append(part)
                         else:
+                            try:
+                                fc = json.loads(part)
+                            except json.JSONDecodeError:
+                                raise ValueError(f'Failed to parse tool call as JSON: {part}\nFull string: {s}')
                             tool_calls.append(
                                 ToolCall(
                                     id=gen_callid(),
-                                    function=FunctionCall(**json.loads(part))))
+                                    function=FunctionCall(**fc)))
                             
-                    content = ''.join(content).strip()
-                    return Message(role="assistant", content=None if content == '' else content, tool_calls=tool_calls)
+                    content = '(...)'.join(content).strip()
+                    return Message(role="assistant", content=content if content else None, tool_calls=tool_calls)
                             
                 # if '<tool_call>'.startswith(ls) or ls.startswith('<tool_call>'):
                 #     if ls.startswith('<tool_call>') and ls.endswith('</tool_call>' + suffix):
@@ -268,17 +301,54 @@ def make_grammar(chat_format: ChatFormat, tools: list[Tool], response_schema: Op
         elif chat_format.tool_style == ToolsPromptStyle.TYPESCRIPT_FUNCTIONARY_V2:
             # Only allowing a single tool call at a time for now.
             # Note that if there were more, they'd be separated by a '<|from|>assistant' literal
+
+            tool_rules = [
+                converter._add_rule(
+                    tool.function.name + '-call',
+                    converter._format_literal(tool.function.name) + ' ' + converter._format_literal('\n<|content|>\n') + ' ' +
+                    converter.visit(tool.function.parameters, tool.function.name + '-args') + ' ' +
+                    converter._format_literal('\n'))
+                # converter.visit(
+                #     dict(
+                #         type="object",
+                #         properties=dict(
+                #             name=dict(const=tool.function.name),
+                #             arguments=tool.function.parameters,
+                #         ),
+                #         required=['name', 'arguments']
+                #     ),
+                #     f'{tool.function.name}-tool-call'
+                # )
+                for i, tool in enumerate(tools)
+            ]
+
+            not_from_rule = converter._add_rule('not_from', converter.not_literal("<|from|>"))
+            content_without_start_rule = converter._add_rule('content_without_start', converter._format_literal("all\n<|content|>") + ' ' + not_from_rule + '*')
+            start_rule = converter._add_rule('start', converter._format_literal('<|from|>assistant\n<|recipient|>'))
+            content_rule = converter._add_rule('content', start_rule + ' ' + content_without_start_rule)
+            tool_call_without_start_rule = converter._add_rule(
+                'tool_call_without_start',
+                ' | '.join(tool_rules))
+                #   + ' ' +
+                # converter.not_literal("all", dotall=False) + ' ' + converter._format_literal('\n<|content|>\n') + ' ' + not_from_rule + '*')
+            tool_call_rule = converter._add_rule('tool_call', f'{start_rule} {tool_call_without_start_rule}')
+            # converter._add_rule('root', f'({content_without_start_rule} ({content_rule})* ({tool_call_rule}+ {content_rule}*)? | {tool_call_without_start_rule} (* {tool_call_rule}{content_rule}*')
             converter._add_rule(
-                "root", 
-                converter._format_literal(prefix) + " (" +
-                    (response_rule or converter.not_literal("<|recipient|>")) + " | " +
-                    (' | '.join(
-                        converter._format_literal(f"<|recipient|>{tool.function.name}\n<|content|>") + " " +
-                        converter.visit(tool.function.parameters, tool.function.name + '-args')
-                        for tool in tools
-                    )) +
-                    ") " +
-                ")") # + converter._format_literal(suffix))
+                'root',
+                f'{content_without_start_rule}   {content_rule}*   ({tool_call_rule}+ {content_rule}*)? | '
+                f'{tool_call_without_start_rule} {tool_call_rule}* {content_rule}*')
+
+            # converter._add_rule(
+            #     "root", 
+            #     converter._format_literal(prefix) + " (" +
+            #         (response_rule or converter.not_literal("<|recipient|>")) + " | " +
+            #         (' | '.join(
+            #             converter._format_literal(f"<|recipient|>{tool.function.name}\n<|content|>") + " " +
+            #             converter.visit(tool.function.parameters, tool.function.name + '-args')
+            #             for tool in tools
+            #         )) +
+            #         ") " +
+            #     ")") # + converter._format_literal(suffix))
     
             @typechecked
             def parse(s: str) -> Optional[Message]:
@@ -297,17 +367,25 @@ def make_grammar(chat_format: ChatFormat, tools: list[Tool], response_schema: Op
                         if recipient == 'all':
                             text_content.append(content)
                         else:
+                            try:
+                                arguments = json.loads(content)
+                            except json.JSONDecodeError:
+                                raise ValueError(f'Failed to parse tool call content as JSON: {content}')
                             tool_calls.append(
                                 ToolCall(
                                     id=gen_callid(),
-                                    function=FunctionCall(name=recipient, arguments=json.loads(content))))
+                                    function=FunctionCall(name=recipient, arguments=arguments)))
+                            
                     
-                    assert parts[-1].strip() == '', f'Unexpected content after tool calls: {parts[-1]}'
+                    assert parts[-1].strip() in ('', '<|stop|>'), f'Unexpected content after tool calls: {parts[-1]}\nFull string: {s}'
 
                     content = '\n'.join(text_content).strip()
-                    return Message(role="assistant", content=None if content == '' else content, tool_calls=tool_calls if tool_calls else None)
+                    return Message(role="assistant", content=content if content else None, tool_calls=tool_calls if tool_calls else None)
 
             return (converter.format_grammar(), parse)
+        
+        else:
+            raise ValueError(f"Unsupported tool call style: {chat_format.tool_style}")
 
     elif response_schema:
         converter._add_rule("root", response_rule + ' ' + converter._format_literal(suffix))
diff --git a/examples/openai/server.py b/examples/openai/server.py
index 8635da9e5..349ad6c7d 100644
--- a/examples/openai/server.py
+++ b/examples/openai/server.py
@@ -30,27 +30,28 @@ def main(
     # model_url: Annotated[Optional[str], typer.Option("--model-url", "-mu")] = None,
     host: str = "localhost",
     port: int = 8080,
-    main_server_endpoint: Optional[str] = None,
-    main_server_host: str = "localhost",
-    main_server_port: Optional[int] = 8081,
+    cpp_server_endpoint: Optional[str] = None,
+    cpp_server_host: str = "localhost",
+    cpp_server_port: Optional[int] = 8081,
 ):
     import uvicorn
 
     metadata = GGUFKeyValues(model)
     context_length = metadata[Keys.LLM.CONTEXT_LENGTH]
     chat_format = ChatFormat.from_gguf(metadata)
-    print(chat_format)
+    # print(chat_format)
 
-    if not main_server_endpoint:
+    if not cpp_server_endpoint:
+        sys.stderr.write(f"# Starting C++ server with model {model} on {cpp_server_host}:{cpp_server_port}\n")
         server_process = subprocess.Popen([
             "./server", "-m", model,
-            "--host", main_server_host, "--port", f'{main_server_port}',
+            "--host", cpp_server_host, "--port", f'{cpp_server_port}',
             '-ctk', 'q4_0', '-ctv', 'f16',
             "-c", f"{2*8192}",
             # "-c", f"{context_length}",
-        ])
+        ], stdout=sys.stderr)
         atexit.register(server_process.kill)
-        main_server_endpoint = f"http://{main_server_host}:{main_server_port}"
+        cpp_server_endpoint = f"http://{cpp_server_host}:{cpp_server_port}"
 
     app = FastAPI()
 
@@ -74,21 +75,17 @@ def main(
         (grammar, parser) = make_grammar(chat_format, chat_request.tools, response_schema)
 
         # TODO: Test whether the template supports formatting tool_calls
-            
+        sys.stderr.write(f'\n{grammar}\n\n')
+
         prompt = chat_format.render(messages, add_generation_prompt=True)
-        print(json.dumps(dict(
-            stream=chat_request.stream,
-            prompt=prompt,
-            # grammar=grammar,
-        ), indent=2))
         async with httpx.AsyncClient() as client:
             response = await client.post(
-                f"{main_server_endpoint}/completions",
+                f"{cpp_server_endpoint}/completions",
                 json=LlamaCppServerCompletionRequest(
                     prompt=prompt,
                     stream=chat_request.stream,
                     n_predict=300,
-                    # grammar=grammar,
+                    grammar=grammar,
                 ).model_dump(),
                 headers=headers,
                 timeout=None)
@@ -103,7 +100,7 @@ def main(
                 # print(json.dumps(result, indent=2))
                 return JSONResponse(result)
 
-            print(json.dumps(result, indent=2))
+            sys.stderr.write(json.dumps(result, indent=2) + "\n")
             # print(json.dumps(result.get('content'), indent=2))
             message = parser(result["content"])
             assert message is not None, f"Failed to parse response:\n{response.text}\n\n"
@@ -118,7 +115,6 @@ def main(
                 choices=[Choice(
                     index=0,
                     message=message,
-                    
                     finish_reason="stop" if message.tool_calls is None else "tool_calls",
                 )],
                 usage=Usage(
diff --git a/examples/openai/test.sh b/examples/openai/test.sh
index 7dcc93e45..3f5d38cd1 100755
--- a/examples/openai/test.sh
+++ b/examples/openai/test.sh
@@ -4,23 +4,60 @@ set -euo pipefail
 SERVER_PID=""
 function cleanup() {
   if [ -n "$SERVER_PID" ]; then
-    echo "# Killing server"
+    echo "# Killing server" >&2
     kill $SERVER_PID
     wait $SERVER_PID
   fi
 }
 trap cleanup EXIT
 
-echo "# Starting the server"
+echo "# Starting the server" >&2
 
-python -m examples.openai --model ~/AI/Models/functionary-medium-v2.2.q4_0.gguf  &
-# python -m examples.openai --model ~/AI/Models/mixtral-8x7b-instruct-v0.1.Q8_0.gguf  &
-# python -m examples.openai --model ~/AI/Models/Hermes-2-Pro-Mistral-7B.Q8_0.gguf &
+args=(
+    # --cpp_server_endpoint "http://localhost:8081"
+    
+    # --model ~/AI/Models/functionary-medium-v2.2.q4_0.gguf
+    
+    # --model ~/AI/Models/mixtral-8x7b-instruct-v0.1.Q8_0.gguf
+    # --model ~/AI/Models/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf
+
+    # --model ~/AI/Models/Hermes-2-Pro-Mistral-7B.Q8_0.gguf
+    --model ~/AI/Models/Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf
+)
+python -m examples.openai "${args[@]}" &
 SERVER_PID=$!
 
 sleep 5
 
-echo "# Send a message to the chat API"
+echo "# Send a message to the chat API" >&2
+
+# curl http://localhost:8080/v1/chat/completions \
+#   -H "Content-Type: application/json" \
+#   -H "Authorization: Bearer $OPENAI_API_KEY" \
+#   -d '{
+#     "model": "gpt-3.5-turbo",
+#     "tools": [{
+#           "type": "function",
+#           "function": {
+#               "name": "get_current_weather",
+#               "description": "Get the current weather",
+#               "parameters": {
+#                   "type": "object",
+#                   "properties": {
+#                       "location": {
+#                           "type": "string",
+#                           "description": "The city and state, e.g. San Francisco, CA"
+#                       }
+#                   },
+#                   "required": ["location"]
+#               }
+#           }
+#       }],
+#     "messages": [
+#       {"role": "user", "content": "I live in the UK. what is the weather going to be like in San Francisco and Glasgow over the next 4 days."}
+#     ]
+#   }' | \
+#   jq .
 
 curl http://localhost:8080/v1/chat/completions \
   -H "Content-Type: application/json" \
@@ -77,6 +114,7 @@ curl http://localhost:8080/v1/chat/completions \
     "messages": [
       {"role": "user", "content": "I live in the UK. what is the weather going to be like in San Francisco and Glasgow over the next 4 days."}
     ]
-  }'
+  }' | \
+  jq .
 
 #   {"role": "system", "content": "Do not make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous."},