diff --git a/examples/server/tests/README.md b/examples/server/tests/README.md index b4c6adfd3..2930a2e0d 100644 --- a/examples/server/tests/README.md +++ b/examples/server/tests/README.md @@ -29,7 +29,7 @@ It's possible to override some scenario steps values with environment variables: |--------------------------|------------------------------------------------------------------------------------------------| | `PORT` | `context.server_port` to set the listening port of the server during scenario, default: `8080` | | `LLAMA_SERVER_BIN_PATH` | to change the server binary path, default: `../../../build/bin/llama-server` | -| `DEBUG` | "ON" to enable steps and server verbose mode `--verbose` | +| `DEBUG` | to enable steps and server verbose mode `--verbose` | | `N_GPU_LAYERS` | number of model layers to offload to VRAM `-ngl --n-gpu-layers` | To run slow tests: @@ -41,7 +41,7 @@ SLOW_TESTS=1 ./tests.sh To run with stdout/stderr display in real time (verbose output, but useful for debugging): ```shell -./tests.sh -s -v -x +DEBUG=1 ./tests.sh -s -v -x ``` To see all available arguments, please refer to [pytest documentation](https://docs.pytest.org/en/stable/how-to/usage.html) diff --git a/examples/server/tests/unit/test_completion.py b/examples/server/tests/unit/test_completion.py index 16a48186c..926f25c9f 100644 --- a/examples/server/tests/unit/test_completion.py +++ b/examples/server/tests/unit/test_completion.py @@ -62,6 +62,7 @@ def test_consistent_result_same_seed(n_slots: int): "prompt": "I believe the meaning of life is", "seed": 42, "temperature": 1.0, + "cache_prompt": False, }) if last_res is not None: assert res.body["content"] == last_res.body["content"] @@ -79,6 +80,7 @@ def test_different_result_different_seed(n_slots: int): "prompt": "I believe the meaning of life is", "seed": seed, "temperature": 1.0, + "cache_prompt": False, }) if last_res is not None: assert res.body["content"] != last_res.body["content"] @@ -97,6 +99,7 @@ def test_consistent_result_different_batch_size(n_batch: int, temperature: float "prompt": "I believe the meaning of life is", "seed": 42, "temperature": temperature, + "cache_prompt": False, }) if last_res is not None: assert res.body["content"] == last_res.body["content"] diff --git a/examples/server/tests/utils.py b/examples/server/tests/utils.py index 75ada2913..bc590bcb3 100644 --- a/examples/server/tests/utils.py +++ b/examples/server/tests/utils.py @@ -74,7 +74,12 @@ class ServerProcess: process: subprocess.Popen | None = None def __init__(self): - pass + if "N_GPU_LAYERS" in os.environ: + self.n_gpu_layer = int(os.environ["N_GPU_LAYERS"]) + if "DEBUG" in os.environ: + self.debug = True + if "PORT" in os.environ: + self.server_port = int(os.environ["PORT"]) def start(self, timeout_seconds: int = 10) -> None: if "LLAMA_SERVER_BIN_PATH" in os.environ: