server: tests: parallel fix server is started twice, add colors to help to monitor in the CI jobs
This commit is contained in:
parent
530d3ae4c4
commit
36ddb962d8
3 changed files with 3 additions and 9 deletions
|
@ -8,6 +8,7 @@ from signal import SIGKILL
|
||||||
|
|
||||||
|
|
||||||
def before_scenario(context, scenario):
|
def before_scenario(context, scenario):
|
||||||
|
print(f"\x1b[33;42mStarting new scenario: {scenario.name}!\x1b[0m")
|
||||||
port = 8080
|
port = 8080
|
||||||
if 'PORT' in os.environ:
|
if 'PORT' in os.environ:
|
||||||
port = int(os.environ['PORT'])
|
port = int(os.environ['PORT'])
|
||||||
|
|
|
@ -50,17 +50,10 @@ Feature: Parallel
|
||||||
Then all prompts are predicted with <n_predict> tokens
|
Then all prompts are predicted with <n_predict> tokens
|
||||||
Examples:
|
Examples:
|
||||||
| streaming | n_predict |
|
| streaming | n_predict |
|
||||||
| disabled | 64 |
|
| disabled | 128 |
|
||||||
#| enabled | 64 | FIXME: phymbert: need to investigate why in aiohttp with streaming only one token is generated
|
#| enabled | 64 | FIXME: phymbert: need to investigate why in aiohttp with streaming only one token is generated
|
||||||
|
|
||||||
Scenario: Multi users with total number of tokens to predict exceeds the KV Cache size #3969
|
Scenario: Multi users with total number of tokens to predict exceeds the KV Cache size #3969
|
||||||
Given a server listening on localhost:8080
|
|
||||||
And a model file stories260K.gguf
|
|
||||||
And 42 as server seed
|
|
||||||
And 2 slots
|
|
||||||
And 64 KV cache size
|
|
||||||
Then the server is starting
|
|
||||||
Then the server is healthy
|
|
||||||
Given a prompt:
|
Given a prompt:
|
||||||
"""
|
"""
|
||||||
Write a very long story about AI.
|
Write a very long story about AI.
|
||||||
|
|
|
@ -88,7 +88,7 @@ def step_start_server(context):
|
||||||
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
|
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
|
||||||
result = sock.connect_ex((context.server_fqdn, context.server_port))
|
result = sock.connect_ex((context.server_fqdn, context.server_port))
|
||||||
if result == 0:
|
if result == 0:
|
||||||
print("\x1b[33;42mserver started!\x1b[0m")
|
print("\x1b[33;46mserver started!\x1b[0m")
|
||||||
return
|
return
|
||||||
attempts += 1
|
attempts += 1
|
||||||
if attempts > 20:
|
if attempts > 20:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue