server : add more test cases (#10569)

* server : add split model test

* add test speculative

* add invalid cases
This commit is contained in:
Xuan Son Nguyen 2024-11-29 21:48:56 +01:00 committed by GitHub
parent 3a8e9af402
commit b782e5c7d4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 186 additions and 1 deletions

View file

@ -32,3 +32,17 @@ def test_server_models():
assert res.status_code == 200
assert len(res.body["data"]) == 1
assert res.body["data"][0]["id"] == server.model_alias
def test_load_split_model():
global server
server.model_hf_repo = "ggml-org/models"
server.model_hf_file = "tinyllamas/split/stories15M-q8_0-00001-of-00003.gguf"
server.model_alias = "tinyllama-split"
server.start()
res = server.make_request("POST", "/completion", data={
"n_predict": 16,
"prompt": "Hello",
"temperature": 0.0,
})
assert res.status_code == 200
assert match_regex("(little|girl)+", res.body["content"])