no cache_prompt for some tests

This commit is contained in:
Xuan Son Nguyen 2024-11-26 15:57:42 +01:00
parent 71fc0f158d
commit 217c9e4215
3 changed files with 11 additions and 3 deletions

View file

@ -29,7 +29,7 @@ It's possible to override some scenario steps values with environment variables:
|--------------------------|------------------------------------------------------------------------------------------------| |--------------------------|------------------------------------------------------------------------------------------------|
| `PORT` | `context.server_port` to set the listening port of the server during scenario, default: `8080` | | `PORT` | `context.server_port` to set the listening port of the server during scenario, default: `8080` |
| `LLAMA_SERVER_BIN_PATH` | to change the server binary path, default: `../../../build/bin/llama-server` | | `LLAMA_SERVER_BIN_PATH` | to change the server binary path, default: `../../../build/bin/llama-server` |
| `DEBUG` | "ON" to enable steps and server verbose mode `--verbose` | | `DEBUG` | to enable steps and server verbose mode `--verbose` |
| `N_GPU_LAYERS` | number of model layers to offload to VRAM `-ngl --n-gpu-layers` | | `N_GPU_LAYERS` | number of model layers to offload to VRAM `-ngl --n-gpu-layers` |
To run slow tests: To run slow tests:
@ -41,7 +41,7 @@ SLOW_TESTS=1 ./tests.sh
To run with stdout/stderr display in real time (verbose output, but useful for debugging): To run with stdout/stderr display in real time (verbose output, but useful for debugging):
```shell ```shell
./tests.sh -s -v -x DEBUG=1 ./tests.sh -s -v -x
``` ```
To see all available arguments, please refer to [pytest documentation](https://docs.pytest.org/en/stable/how-to/usage.html) To see all available arguments, please refer to [pytest documentation](https://docs.pytest.org/en/stable/how-to/usage.html)

View file

@ -62,6 +62,7 @@ def test_consistent_result_same_seed(n_slots: int):
"prompt": "I believe the meaning of life is", "prompt": "I believe the meaning of life is",
"seed": 42, "seed": 42,
"temperature": 1.0, "temperature": 1.0,
"cache_prompt": False,
}) })
if last_res is not None: if last_res is not None:
assert res.body["content"] == last_res.body["content"] assert res.body["content"] == last_res.body["content"]
@ -79,6 +80,7 @@ def test_different_result_different_seed(n_slots: int):
"prompt": "I believe the meaning of life is", "prompt": "I believe the meaning of life is",
"seed": seed, "seed": seed,
"temperature": 1.0, "temperature": 1.0,
"cache_prompt": False,
}) })
if last_res is not None: if last_res is not None:
assert res.body["content"] != last_res.body["content"] assert res.body["content"] != last_res.body["content"]
@ -97,6 +99,7 @@ def test_consistent_result_different_batch_size(n_batch: int, temperature: float
"prompt": "I believe the meaning of life is", "prompt": "I believe the meaning of life is",
"seed": 42, "seed": 42,
"temperature": temperature, "temperature": temperature,
"cache_prompt": False,
}) })
if last_res is not None: if last_res is not None:
assert res.body["content"] == last_res.body["content"] assert res.body["content"] == last_res.body["content"]

View file

@ -74,7 +74,12 @@ class ServerProcess:
process: subprocess.Popen | None = None process: subprocess.Popen | None = None
def __init__(self): def __init__(self):
pass if "N_GPU_LAYERS" in os.environ:
self.n_gpu_layer = int(os.environ["N_GPU_LAYERS"])
if "DEBUG" in os.environ:
self.debug = True
if "PORT" in os.environ:
self.server_port = int(os.environ["PORT"])
def start(self, timeout_seconds: int = 10) -> None: def start(self, timeout_seconds: int = 10) -> None:
if "LLAMA_SERVER_BIN_PATH" in os.environ: if "LLAMA_SERVER_BIN_PATH" in os.environ: