server : add split model test

This commit is contained in:
Xuan Son Nguyen 2024-11-28 14:40:22 +01:00
parent 76b27d29c2
commit ac404be2dc

View file

@ -32,3 +32,17 @@ def test_server_models():
assert res.status_code == 200 assert res.status_code == 200
assert len(res.body["data"]) == 1 assert len(res.body["data"]) == 1
assert res.body["data"][0]["id"] == server.model_alias assert res.body["data"][0]["id"] == server.model_alias
def test_load_split_model():
global server
server.model_hf_repo = "ggml-org/models"
server.model_hf_file = "tinyllamas/split/stories15M-q8_0-00001-of-00003.gguf"
server.model_alias = "tinyllama-split"
server.start()
res = server.make_request("POST", "/completion", data={
"n_predict": 16,
"prompt": "Hello",
"temperature": 0.0,
})
assert res.status_code == 200
assert match_regex("(little|girl)+", res.body["content"])