server.py: raise n_predict
This commit is contained in:
parent
a4062935a5
commit
63a384deaf
3 changed files with 6 additions and 4 deletions
|
@ -323,7 +323,9 @@ def make_grammar(chat_format: ChatFormat, tools: list[Tool], response_schema: Op
|
|||
]
|
||||
|
||||
not_from_rule = converter._add_rule('not_from', converter.not_literal("<|from|>"))
|
||||
content_without_start_rule = converter._add_rule('content_without_start', converter._format_literal("all\n<|content|>") + ' ' + not_from_rule + '*')
|
||||
content_without_start_rule = converter._add_rule(
|
||||
'content_without_start',
|
||||
converter._format_literal("all\n<|content|>") + ' ' + not_from_rule + '*')
|
||||
start_rule = converter._add_rule('start', converter._format_literal('<|from|>assistant\n<|recipient|>'))
|
||||
content_rule = converter._add_rule('content', start_rule + ' ' + content_without_start_rule)
|
||||
tool_call_without_start_rule = converter._add_rule(
|
||||
|
|
|
@ -84,7 +84,7 @@ def main(
|
|||
json=LlamaCppServerCompletionRequest(
|
||||
prompt=prompt,
|
||||
stream=chat_request.stream,
|
||||
n_predict=300,
|
||||
n_predict=1000,
|
||||
grammar=grammar,
|
||||
).model_dump(),
|
||||
headers=headers,
|
||||
|
|
|
@ -16,13 +16,13 @@ echo "# Starting the server" >&2
|
|||
args=(
|
||||
# --cpp_server_endpoint "http://localhost:8081"
|
||||
|
||||
# --model ~/AI/Models/functionary-medium-v2.2.q4_0.gguf
|
||||
--model ~/AI/Models/functionary-medium-v2.2.q4_0.gguf
|
||||
|
||||
# --model ~/AI/Models/mixtral-8x7b-instruct-v0.1.Q8_0.gguf
|
||||
# --model ~/AI/Models/mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf
|
||||
|
||||
# --model ~/AI/Models/Hermes-2-Pro-Mistral-7B.Q8_0.gguf
|
||||
--model ~/AI/Models/Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf
|
||||
# --model ~/AI/Models/Hermes-2-Pro-Mistral-7B.Q4_K_M.gguf
|
||||
)
|
||||
python -m examples.openai "${args[@]}" &
|
||||
SERVER_PID=$!
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue