no cache_prompt for some tests

2024-11-26 15:57:42 +01:00 · 2024-11-26 15:57:42 +01:00 · 217c9e4215
commit 217c9e4215
parent 71fc0f158d
3 changed files with 11 additions and 3 deletions
--- a/examples/server/tests/README.md
+++ b/examples/server/tests/README.md
@ -29,7 +29,7 @@ It's possible to override some scenario steps values with environment variables:
 |--------------------------|------------------------------------------------------------------------------------------------|
 | `PORT`                   | `context.server_port` to set the listening port of the server during scenario, default: `8080` |
 | `LLAMA_SERVER_BIN_PATH`  | to change the server binary path, default: `../../../build/bin/llama-server`                         |
-| `DEBUG`                  | "ON" to enable steps and server verbose mode `--verbose`                                       |
+| `DEBUG`                  | to enable steps and server verbose mode `--verbose`                                       |
 | `N_GPU_LAYERS`           | number of model layers to offload to VRAM `-ngl --n-gpu-layers`                                |

 To run slow tests:
@ -41,7 +41,7 @@ SLOW_TESTS=1 ./tests.sh
 To run with stdout/stderr display in real time (verbose output, but useful for debugging):

 ```shell
-./tests.sh -s -v -x
+DEBUG=1 ./tests.sh -s -v -x
 ```

 To see all available arguments, please refer to [pytest documentation](https://docs.pytest.org/en/stable/how-to/usage.html)
--- a/examples/server/tests/unit/test_completion.py
+++ b/examples/server/tests/unit/test_completion.py
@ -62,6 +62,7 @@ def test_consistent_result_same_seed(n_slots: int):
            "prompt": "I believe the meaning of life is",
            "seed": 42,
            "temperature": 1.0,
+            "cache_prompt": False,
        })
        if last_res is not None:
            assert res.body["content"] == last_res.body["content"]
@ -79,6 +80,7 @@ def test_different_result_different_seed(n_slots: int):
            "prompt": "I believe the meaning of life is",
            "seed": seed,
            "temperature": 1.0,
+            "cache_prompt": False,
        })
        if last_res is not None:
            assert res.body["content"] != last_res.body["content"]
@ -97,6 +99,7 @@ def test_consistent_result_different_batch_size(n_batch: int, temperature: float
            "prompt": "I believe the meaning of life is",
            "seed": 42,
            "temperature": temperature,
+            "cache_prompt": False,
        })
        if last_res is not None:
            assert res.body["content"] == last_res.body["content"]
--- a/examples/server/tests/utils.py
+++ b/examples/server/tests/utils.py
@ -74,7 +74,12 @@ class ServerProcess:
    process: subprocess.Popen | None = None

    def __init__(self):
-        pass
+        if "N_GPU_LAYERS" in os.environ:
+            self.n_gpu_layer = int(os.environ["N_GPU_LAYERS"])
+        if "DEBUG" in os.environ:
+            self.debug = True
+        if "PORT" in os.environ:
+            self.server_port = int(os.environ["PORT"])

    def start(self, timeout_seconds: int = 10) -> None:
        if "LLAMA_SERVER_BIN_PATH" in os.environ: