server: tests: fix concurrent OAI streaming request
This commit is contained in:
parent
77b8589dbb
commit
71831494b1
2 changed files with 19 additions and 14 deletions
|
@ -51,7 +51,7 @@ Feature: Parallel
|
|||
Examples:
|
||||
| streaming | n_predict |
|
||||
| disabled | 128 |
|
||||
#| enabled | 64 | FIXME: phymbert: need to investigate why in aiohttp with streaming only one token is generated
|
||||
| enabled | 64 |
|
||||
|
||||
Scenario: Multi users with total number of tokens to predict exceeds the KV Cache size #3969
|
||||
Given a prompt:
|
||||
|
|
|
@ -485,20 +485,25 @@ async def oai_chat_completions(user_prompt,
|
|||
assert response.status == 200
|
||||
assert response.headers['Access-Control-Allow-Origin'] == origin
|
||||
assert response.headers['Content-Type'] == "text/event-stream"
|
||||
event_received = True
|
||||
while event_received:
|
||||
event_received = False
|
||||
async for line_in_bytes in response.content:
|
||||
line = line_in_bytes.decode('utf8')
|
||||
line = line.rstrip('\n').rstrip('\r')
|
||||
if line == '':
|
||||
continue
|
||||
event_data = line.split(': ', 1)
|
||||
assert event_data[0] == 'data', f'Bad event code received: ```{event_data}```'
|
||||
chunk_raw = event_data[1]
|
||||
|
||||
async for line_in_bytes in response.content:
|
||||
line = line_in_bytes.decode('utf8')
|
||||
event_data = line.split(': ', 1)
|
||||
assert event_data[0] == 'data', f'{event_data}'
|
||||
chunk_raw = event_data[1]
|
||||
|
||||
chunk = json.loads(chunk_raw)
|
||||
assert len(chunk['choices']) == 1
|
||||
delta = chunk['choices'][0]['delta']
|
||||
if 'content' in delta:
|
||||
completion_response['content'] += delta['content']
|
||||
completion_response['timings']['predicted_n'] += 1
|
||||
print(f"DEBUG completion_response: {completion_response}")
|
||||
chunk = json.loads(chunk_raw)
|
||||
assert len(chunk['choices']) == 1, f"no choices provided, line ```{line}```"
|
||||
delta = chunk['choices'][0]['delta']
|
||||
if 'content' in delta:
|
||||
completion_response['content'] += delta['content']
|
||||
completion_response['timings']['predicted_n'] += 1
|
||||
print(f"DEBUG completion_response: {completion_response}")
|
||||
else:
|
||||
if expect_api_error is None or not expect_api_error:
|
||||
assert response.status == 200
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue