server : add "tokens" output (#10853)
* server : add "tokens" output ggml-ci * server : update readme ggml-ci * server : return tokens ids only if requested ggml-ci * tests : improve "tokens" type check Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com> * server : remove "tokens" from the OAI endpoint ggml-ci --------- Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>
This commit is contained in:
parent
46828872c3
commit
0e70ba686e
3 changed files with 46 additions and 16 deletions
|
@ -10,16 +10,17 @@ def create_server():
|
|||
global server
|
||||
server = ServerPreset.tinyllama2()
|
||||
|
||||
@pytest.mark.parametrize("prompt,n_predict,re_content,n_prompt,n_predicted,truncated", [
|
||||
("I believe the meaning of life is", 8, "(going|bed)+", 18, 8, False),
|
||||
("Write a joke about AI from a very long prompt which will not be truncated", 256, "(princesses|everyone|kids|Anna|forest)+", 46, 64, False),
|
||||
@pytest.mark.parametrize("prompt,n_predict,re_content,n_prompt,n_predicted,truncated,return_tokens", [
|
||||
("I believe the meaning of life is", 8, "(going|bed)+", 18, 8, False, False),
|
||||
("Write a joke about AI from a very long prompt which will not be truncated", 256, "(princesses|everyone|kids|Anna|forest)+", 46, 64, False, True),
|
||||
])
|
||||
def test_completion(prompt: str, n_predict: int, re_content: str, n_prompt: int, n_predicted: int, truncated: bool):
|
||||
def test_completion(prompt: str, n_predict: int, re_content: str, n_prompt: int, n_predicted: int, truncated: bool, return_tokens: bool):
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_request("POST", "/completion", data={
|
||||
"n_predict": n_predict,
|
||||
"prompt": prompt,
|
||||
"return_tokens": return_tokens,
|
||||
})
|
||||
assert res.status_code == 200
|
||||
assert res.body["timings"]["prompt_n"] == n_prompt
|
||||
|
@ -27,6 +28,11 @@ def test_completion(prompt: str, n_predict: int, re_content: str, n_prompt: int,
|
|||
assert res.body["truncated"] == truncated
|
||||
assert type(res.body["has_new_line"]) == bool
|
||||
assert match_regex(re_content, res.body["content"])
|
||||
if return_tokens:
|
||||
assert len(res.body["tokens"]) > 0
|
||||
assert all(type(tok) == int for tok in res.body["tokens"])
|
||||
else:
|
||||
assert res.body["tokens"] == []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("prompt,n_predict,re_content,n_prompt,n_predicted,truncated", [
|
||||
|
@ -56,6 +62,8 @@ def test_completion_stream(prompt: str, n_predict: int, re_content: str, n_promp
|
|||
assert data["generation_settings"]["seed"] == server.seed
|
||||
assert match_regex(re_content, content)
|
||||
else:
|
||||
assert len(data["tokens"]) > 0
|
||||
assert all(type(tok) == int for tok in data["tokens"])
|
||||
content += data["content"]
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue