server: tests: add infinite loop scenario
This commit is contained in:
parent
6b9dc4f291
commit
68574c6f98
2 changed files with 3 additions and 3 deletions
|
@ -42,7 +42,7 @@ Feature: llama.cpp server
|
||||||
"""
|
"""
|
||||||
Write another very long music lyrics.
|
Write another very long music lyrics.
|
||||||
"""
|
"""
|
||||||
And 512 max tokens to predict
|
And 256 max tokens to predict
|
||||||
Given concurrent completion requests
|
Given concurrent completion requests
|
||||||
Then the server is busy
|
Then the server is busy
|
||||||
And all slots are busy
|
And all slots are busy
|
||||||
|
@ -62,7 +62,7 @@ Feature: llama.cpp server
|
||||||
"""
|
"""
|
||||||
Write another very long music lyrics.
|
Write another very long music lyrics.
|
||||||
"""
|
"""
|
||||||
And 512 max tokens to predict
|
And 256 max tokens to predict
|
||||||
And streaming is enabled
|
And streaming is enabled
|
||||||
Given concurrent OAI completions requests
|
Given concurrent OAI completions requests
|
||||||
Then the server is busy
|
Then the server is busy
|
||||||
|
|
|
@ -176,7 +176,7 @@ def oai_chat_completions(context, user_prompt):
|
||||||
model=context.model,
|
model=context.model,
|
||||||
max_tokens=context.n_predict,
|
max_tokens=context.n_predict,
|
||||||
stream=context.enable_streaming,
|
stream=context.enable_streaming,
|
||||||
seed = context.seed
|
seed=context.seed
|
||||||
)
|
)
|
||||||
if context.enable_streaming:
|
if context.enable_streaming:
|
||||||
completion_response = {
|
completion_response = {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue