diff --git a/examples/openai/prompting.py b/examples/openai/prompting.py index d8ecd5174..ab3ce89a8 100644 --- a/examples/openai/prompting.py +++ b/examples/openai/prompting.py @@ -323,7 +323,9 @@ def make_grammar(chat_format: ChatFormat, tools: list[Tool], response_schema: Op ] not_from_rule = converter._add_rule('not_from', converter.not_literal("<|from|>")) - content_without_start_rule = converter._add_rule('content_without_start', converter._format_literal("all\n<|content|>") + ' ' + not_from_rule + '*') + content_without_start_rule = converter._add_rule( + 'content_without_start', + converter._format_literal("all\n<|content|>") + ' ' + not_from_rule + '*') start_rule = converter._add_rule('start', converter._format_literal('<|from|>assistant\n<|recipient|>')) content_rule = converter._add_rule('content', start_rule + ' ' + content_without_start_rule) tool_call_without_start_rule = converter._add_rule( diff --git a/examples/openai/server.py b/examples/openai/server.py index 349ad6c7d..21c30623d 100644 --- a/examples/openai/server.py +++ b/examples/openai/server.py @@ -84,7 +84,7 @@ def main( json=LlamaCppServerCompletionRequest( prompt=prompt, stream=chat_request.stream, - n_predict=300, + n_predict=1000, grammar=grammar, ).model_dump(), headers=headers, diff --git a/examples/openai/test.sh b/examples/openai/test.sh index 3f5d38cd1..44a6c44de 100755 --- a/examples/openai/test.sh +++ b/examples/openai/test.sh @@ -16,13 +16,13 @@ echo "# Starting the server" >&2 args=( # --cpp_server_endpoint "http://localhost:8081" - # --model ~/AI/Models/functionary-medium-v2.2.q4_0.gguf + --model ~/AI/Models/functionary-medium-v2.2.q4_0.gguf # --model ~/AI/Models/mixtral-8x7b-instruct-v0.1.Q8_0.gguf # --model ~/AI/Models/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf # --model ~/AI/Models/Hermes-2-Pro-Mistral-7B.Q8_0.gguf - --model ~/AI/Models/Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf + # --model ~/AI/Models/Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf ) python -m examples.openai "${args[@]}" & SERVER_PID=$!