server: tests: parallel fix server is started twice, add colors to help to monitor in the CI jobs
This commit is contained in:
parent
530d3ae4c4
commit
36ddb962d8
3 changed files with 3 additions and 9 deletions
|
@ -8,6 +8,7 @@ from signal import SIGKILL
|
|||
|
||||
|
||||
def before_scenario(context, scenario):
|
||||
print(f"\x1b[33;42mStarting new scenario: {scenario.name}!\x1b[0m")
|
||||
port = 8080
|
||||
if 'PORT' in os.environ:
|
||||
port = int(os.environ['PORT'])
|
||||
|
|
|
@ -50,17 +50,10 @@ Feature: Parallel
|
|||
Then all prompts are predicted with <n_predict> tokens
|
||||
Examples:
|
||||
| streaming | n_predict |
|
||||
| disabled | 64 |
|
||||
| disabled | 128 |
|
||||
#| enabled | 64 | FIXME: phymbert: need to investigate why in aiohttp with streaming only one token is generated
|
||||
|
||||
Scenario: Multi users with total number of tokens to predict exceeds the KV Cache size #3969
|
||||
Given a server listening on localhost:8080
|
||||
And a model file stories260K.gguf
|
||||
And 42 as server seed
|
||||
And 2 slots
|
||||
And 64 KV cache size
|
||||
Then the server is starting
|
||||
Then the server is healthy
|
||||
Given a prompt:
|
||||
"""
|
||||
Write a very long story about AI.
|
||||
|
|
|
@ -88,7 +88,7 @@ def step_start_server(context):
|
|||
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
|
||||
result = sock.connect_ex((context.server_fqdn, context.server_port))
|
||||
if result == 0:
|
||||
print("\x1b[33;42mserver started!\x1b[0m")
|
||||
print("\x1b[33;46mserver started!\x1b[0m")
|
||||
return
|
||||
attempts += 1
|
||||
if attempts > 20:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue