server: tests: add infinite loop
This commit is contained in:
parent
0772884b06
commit
6b9dc4f291
2 changed files with 30 additions and 19 deletions
|
@ -36,24 +36,13 @@ Feature: llama.cpp server
|
|||
Scenario: Multi users
|
||||
Given a prompt:
|
||||
"""
|
||||
Write a formal complaint email to Air France about my delayed
|
||||
baggage from my flight on Tuesday, January 17th, from Paris to Toulouse. Be verbose.
|
||||
Write a very long story about AI.
|
||||
"""
|
||||
And a prompt:
|
||||
"""
|
||||
Translate the following War & Peace chapter into Russian: WELL, PRINCE,
|
||||
Genoa and Lucca are now no more than private estates of the Bonaparte
|
||||
family. No, I warn you, that if you do not tell me we are at war,
|
||||
if you again allow yourself to palliate all the infamies and atrocities
|
||||
of this Antichrist (upon my word, I believe he is), I don’t know you
|
||||
in future, you are no longer my friend, no longer my faithful slave,
|
||||
as you say. There, how do you do, how do you do? I see I’m scaring you,
|
||||
sit down and talk to me.” These words were uttered in July 1805 by
|
||||
Anna Pavlovna Scherer, a distinguished lady of the court,
|
||||
and confidential maid-of-honour to the Empress Marya Fyodorovna.
|
||||
It was her greeting to Prince Vassily, a man high in rank
|
||||
and office, who was the first to arrive at her soirée.
|
||||
Write another very long music lyrics.
|
||||
"""
|
||||
And 512 max tokens to predict
|
||||
Given concurrent completion requests
|
||||
Then the server is busy
|
||||
And all slots are busy
|
||||
|
@ -65,8 +54,6 @@ Feature: llama.cpp server
|
|||
Scenario: Multi users OAI Compatibility
|
||||
Given a system prompt "You are an AI assistant."
|
||||
And a model tinyllama-2
|
||||
And 1024 max tokens to predict
|
||||
And streaming is enabled
|
||||
Given a prompt:
|
||||
"""
|
||||
Write a very long story about AI.
|
||||
|
@ -75,6 +62,8 @@ Feature: llama.cpp server
|
|||
"""
|
||||
Write another very long music lyrics.
|
||||
"""
|
||||
And 512 max tokens to predict
|
||||
And streaming is enabled
|
||||
Given concurrent OAI completions requests
|
||||
Then the server is busy
|
||||
And all slots are busy
|
||||
|
@ -82,3 +71,25 @@ Feature: llama.cpp server
|
|||
And all slots are idle
|
||||
Then all prompts are predicted
|
||||
|
||||
# FIXME: infinite loop on the CI, not locally, if n_prompt * n_predict > kv_size
|
||||
Scenario: Multi users with total number of tokens to predict exceeds the KV Cache size
|
||||
Given a prompt:
|
||||
"""
|
||||
Write a very long story about AI.
|
||||
"""
|
||||
And a prompt:
|
||||
"""
|
||||
Write another very long music lyrics.
|
||||
"""
|
||||
And a prompt:
|
||||
"""
|
||||
Write a very long poem.
|
||||
"""
|
||||
And 1024 max tokens to predict
|
||||
Given concurrent completion requests
|
||||
Then the server is busy
|
||||
And all slots are busy
|
||||
Then the server is idle
|
||||
And all slots are idle
|
||||
Then all prompts are predicted
|
||||
|
||||
|
|
|
@ -105,7 +105,7 @@ def step_model(context, model):
|
|||
|
||||
@step(u'{max_tokens} max tokens to predict')
|
||||
def step_max_tokens(context, max_tokens):
|
||||
context.max_tokens = int(max_tokens)
|
||||
context.n_predict = int(max_tokens)
|
||||
|
||||
|
||||
@step(u'streaming is {enable_streaming}')
|
||||
|
@ -154,7 +154,7 @@ def concurrent_requests(context, f_completion):
|
|||
def request_completion(context, prompt, n_predict=None):
|
||||
response = requests.post(f'{context.base_url}/completion', json={
|
||||
"prompt": prompt,
|
||||
"n_predict": int(n_predict) if n_predict is not None else 4096,
|
||||
"n_predict": int(n_predict) if n_predict is not None else context.n_predict,
|
||||
"seed": context.seed
|
||||
})
|
||||
assert response.status_code == 200
|
||||
|
@ -174,7 +174,7 @@ def oai_chat_completions(context, user_prompt):
|
|||
}
|
||||
],
|
||||
model=context.model,
|
||||
max_tokens=context.max_tokens,
|
||||
max_tokens=context.n_predict,
|
||||
stream=context.enable_streaming,
|
||||
seed = context.seed
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue