server.py: raise n_predict

This commit is contained in:
ochafik 2024-03-28 00:42:12 +00:00
parent a4062935a5
commit 63a384deaf
3 changed files with 6 additions and 4 deletions

View file

@ -323,7 +323,9 @@ def make_grammar(chat_format: ChatFormat, tools: list[Tool], response_schema: Op
]
not_from_rule = converter._add_rule('not_from', converter.not_literal("<|from|>"))
content_without_start_rule = converter._add_rule('content_without_start', converter._format_literal("all\n<|content|>") + ' ' + not_from_rule + '*')
content_without_start_rule = converter._add_rule(
'content_without_start',
converter._format_literal("all\n<|content|>") + ' ' + not_from_rule + '*')
start_rule = converter._add_rule('start', converter._format_literal('<|from|>assistant\n<|recipient|>'))
content_rule = converter._add_rule('content', start_rule + ' ' + content_without_start_rule)
tool_call_without_start_rule = converter._add_rule(

View file

@ -84,7 +84,7 @@ def main(
json=LlamaCppServerCompletionRequest(
prompt=prompt,
stream=chat_request.stream,
n_predict=300,
n_predict=1000,
grammar=grammar,
).model_dump(),
headers=headers,

View file

@ -16,13 +16,13 @@ echo "# Starting the server" >&2
args=(
# --cpp_server_endpoint "http://localhost:8081"
# --model ~/AI/Models/functionary-medium-v2.2.q4_0.gguf
--model ~/AI/Models/functionary-medium-v2.2.q4_0.gguf
# --model ~/AI/Models/mixtral-8x7b-instruct-v0.1.Q8_0.gguf
# --model ~/AI/Models/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf
# --model ~/AI/Models/Hermes-2-Pro-Mistral-7B.Q8_0.gguf
--model ~/AI/Models/Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf
# --model ~/AI/Models/Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf
)
python -m examples.openai "${args[@]}" &
SERVER_PID=$!