diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature index 6e54395b6..681025cf5 100644 --- a/examples/server/tests/features/server.feature +++ b/examples/server/tests/features/server.feature @@ -42,7 +42,7 @@ Feature: llama.cpp server """ Write another very long music lyrics. """ - And 512 max tokens to predict + And 256 max tokens to predict Given concurrent completion requests Then the server is busy And all slots are busy @@ -62,7 +62,7 @@ Feature: llama.cpp server """ Write another very long music lyrics. """ - And 512 max tokens to predict + And 256 max tokens to predict And streaming is enabled Given concurrent OAI completions requests Then the server is busy diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index 896d8e32d..75e893afa 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -176,7 +176,7 @@ def oai_chat_completions(context, user_prompt): model=context.model, max_tokens=context.n_predict, stream=context.enable_streaming, - seed = context.seed + seed=context.seed ) if context.enable_streaming: completion_response = {