server : add "tokens" output (#10853)

* server : add "tokens" output

ggml-ci

* server : update readme

ggml-ci

* server : return tokens ids only if requested

ggml-ci

* tests : improve "tokens" type check

Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>

* server : remove "tokens" from the OAI endpoint

ggml-ci

---------

Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>
This commit is contained in:
Georgi Gerganov 2024-12-18 11:05:29 +02:00 committed by GitHub
parent 46828872c3
commit 0e70ba686e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 46 additions and 16 deletions

View file

@ -10,16 +10,17 @@ def create_server():
global server
server = ServerPreset.tinyllama2()
@pytest.mark.parametrize("prompt,n_predict,re_content,n_prompt,n_predicted,truncated", [
("I believe the meaning of life is", 8, "(going|bed)+", 18, 8, False),
("Write a joke about AI from a very long prompt which will not be truncated", 256, "(princesses|everyone|kids|Anna|forest)+", 46, 64, False),
@pytest.mark.parametrize("prompt,n_predict,re_content,n_prompt,n_predicted,truncated,return_tokens", [
("I believe the meaning of life is", 8, "(going|bed)+", 18, 8, False, False),
("Write a joke about AI from a very long prompt which will not be truncated", 256, "(princesses|everyone|kids|Anna|forest)+", 46, 64, False, True),
])
def test_completion(prompt: str, n_predict: int, re_content: str, n_prompt: int, n_predicted: int, truncated: bool):
def test_completion(prompt: str, n_predict: int, re_content: str, n_prompt: int, n_predicted: int, truncated: bool, return_tokens: bool):
global server
server.start()
res = server.make_request("POST", "/completion", data={
"n_predict": n_predict,
"prompt": prompt,
"return_tokens": return_tokens,
})
assert res.status_code == 200
assert res.body["timings"]["prompt_n"] == n_prompt
@ -27,6 +28,11 @@ def test_completion(prompt: str, n_predict: int, re_content: str, n_prompt: int,
assert res.body["truncated"] == truncated
assert type(res.body["has_new_line"]) == bool
assert match_regex(re_content, res.body["content"])
if return_tokens:
assert len(res.body["tokens"]) > 0
assert all(type(tok) == int for tok in res.body["tokens"])
else:
assert res.body["tokens"] == []
@pytest.mark.parametrize("prompt,n_predict,re_content,n_prompt,n_predicted,truncated", [
@ -56,6 +62,8 @@ def test_completion_stream(prompt: str, n_predict: int, re_content: str, n_promp
assert data["generation_settings"]["seed"] == server.seed
assert match_regex(re_content, content)
else:
assert len(data["tokens"]) > 0
assert all(type(tok) == int for tok in data["tokens"])
content += data["content"]