no cache_prompt for some tests
This commit is contained in:
parent
71fc0f158d
commit
217c9e4215
3 changed files with 11 additions and 3 deletions
|
@ -29,7 +29,7 @@ It's possible to override some scenario steps values with environment variables:
|
|||
|--------------------------|------------------------------------------------------------------------------------------------|
|
||||
| `PORT` | `context.server_port` to set the listening port of the server during scenario, default: `8080` |
|
||||
| `LLAMA_SERVER_BIN_PATH` | to change the server binary path, default: `../../../build/bin/llama-server` |
|
||||
| `DEBUG` | "ON" to enable steps and server verbose mode `--verbose` |
|
||||
| `DEBUG` | to enable steps and server verbose mode `--verbose` |
|
||||
| `N_GPU_LAYERS` | number of model layers to offload to VRAM `-ngl --n-gpu-layers` |
|
||||
|
||||
To run slow tests:
|
||||
|
@ -41,7 +41,7 @@ SLOW_TESTS=1 ./tests.sh
|
|||
To run with stdout/stderr display in real time (verbose output, but useful for debugging):
|
||||
|
||||
```shell
|
||||
./tests.sh -s -v -x
|
||||
DEBUG=1 ./tests.sh -s -v -x
|
||||
```
|
||||
|
||||
To see all available arguments, please refer to [pytest documentation](https://docs.pytest.org/en/stable/how-to/usage.html)
|
||||
|
|
|
@ -62,6 +62,7 @@ def test_consistent_result_same_seed(n_slots: int):
|
|||
"prompt": "I believe the meaning of life is",
|
||||
"seed": 42,
|
||||
"temperature": 1.0,
|
||||
"cache_prompt": False,
|
||||
})
|
||||
if last_res is not None:
|
||||
assert res.body["content"] == last_res.body["content"]
|
||||
|
@ -79,6 +80,7 @@ def test_different_result_different_seed(n_slots: int):
|
|||
"prompt": "I believe the meaning of life is",
|
||||
"seed": seed,
|
||||
"temperature": 1.0,
|
||||
"cache_prompt": False,
|
||||
})
|
||||
if last_res is not None:
|
||||
assert res.body["content"] != last_res.body["content"]
|
||||
|
@ -97,6 +99,7 @@ def test_consistent_result_different_batch_size(n_batch: int, temperature: float
|
|||
"prompt": "I believe the meaning of life is",
|
||||
"seed": 42,
|
||||
"temperature": temperature,
|
||||
"cache_prompt": False,
|
||||
})
|
||||
if last_res is not None:
|
||||
assert res.body["content"] == last_res.body["content"]
|
||||
|
|
|
@ -74,7 +74,12 @@ class ServerProcess:
|
|||
process: subprocess.Popen | None = None
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
if "N_GPU_LAYERS" in os.environ:
|
||||
self.n_gpu_layer = int(os.environ["N_GPU_LAYERS"])
|
||||
if "DEBUG" in os.environ:
|
||||
self.debug = True
|
||||
if "PORT" in os.environ:
|
||||
self.server_port = int(os.environ["PORT"])
|
||||
|
||||
def start(self, timeout_seconds: int = 10) -> None:
|
||||
if "LLAMA_SERVER_BIN_PATH" in os.environ:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue