diff --git a/examples/server/tests/features/parallel.feature b/examples/server/tests/features/parallel.feature index 6cd306a2b..96a96d6f8 100644 --- a/examples/server/tests/features/parallel.feature +++ b/examples/server/tests/features/parallel.feature @@ -52,8 +52,8 @@ Feature: Parallel Then all prompts are predicted with tokens Examples: | streaming | n_predict | - | disabled | 128 | - | enabled | 64 | + | disabled | 200 | + | enabled | 200 | Scenario Outline: Multi users OAI completions compatibility no v1 Given a system prompt You are a writer. diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index 1ba7b60b6..1864a694f 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -818,7 +818,7 @@ async def concurrent_requests(context, f_completion, *args, **kwargs): for prompt_no in range(context.n_prompts): shifted_args = [context.prompts.pop(), seeds[prompt_no], *args] context.concurrent_tasks.append(asyncio.create_task(f_completion(*shifted_args, **kwargs))) - await asyncio.sleep(0.1) + await asyncio.sleep(0.01) @step('the slot {slot_id:d} is saved with filename "{filename}"')