server : fix logprobs, make it OAI-compatible (#10783)
* server : fix logprobs, make it openai-compatible * update docs * add std::log * return pre-sampling p * sort before apply softmax * add comment * fix test * set p for sampled token * update docs * add --multi-token-probs * update docs * add `post_sampling_probs` option * update docs [no ci] * remove --multi-token-probs * "top_probs" with "post_sampling_probs" * resolve review comments * rename struct token_prob to prob_info * correct comment placement * fix setting prob for sampled token
This commit is contained in:
parent
a3c33b1dce
commit
57bb2c40cd
6 changed files with 396 additions and 107 deletions
|
@ -92,7 +92,6 @@ def test_chat_completion_with_openai_library():
|
|||
seed=42,
|
||||
temperature=0.8,
|
||||
)
|
||||
print(res)
|
||||
assert res.choices[0].finish_reason == "length"
|
||||
assert res.choices[0].message.content is not None
|
||||
assert match_regex("(Suddenly)+", res.choices[0].message.content)
|
||||
|
@ -163,3 +162,64 @@ def test_chat_completion_with_timings_per_token():
|
|||
assert "predicted_per_second" in data["timings"]
|
||||
assert "predicted_n" in data["timings"]
|
||||
assert data["timings"]["predicted_n"] <= 10
|
||||
|
||||
|
||||
def test_logprobs():
|
||||
global server
|
||||
server.start()
|
||||
client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}")
|
||||
res = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo-instruct",
|
||||
temperature=0.0,
|
||||
messages=[
|
||||
{"role": "system", "content": "Book"},
|
||||
{"role": "user", "content": "What is the best book"},
|
||||
],
|
||||
max_tokens=5,
|
||||
logprobs=True,
|
||||
top_logprobs=10,
|
||||
)
|
||||
output_text = res.choices[0].message.content
|
||||
aggregated_text = ''
|
||||
assert res.choices[0].logprobs is not None
|
||||
assert res.choices[0].logprobs.content is not None
|
||||
for token in res.choices[0].logprobs.content:
|
||||
aggregated_text += token.token
|
||||
assert token.logprob <= 0.0
|
||||
assert token.bytes is not None
|
||||
assert len(token.top_logprobs) > 0
|
||||
assert aggregated_text == output_text
|
||||
|
||||
|
||||
def test_logprobs_stream():
|
||||
global server
|
||||
server.start()
|
||||
client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}")
|
||||
res = client.chat.completions.create(
|
||||
model="gpt-3.5-turbo-instruct",
|
||||
temperature=0.0,
|
||||
messages=[
|
||||
{"role": "system", "content": "Book"},
|
||||
{"role": "user", "content": "What is the best book"},
|
||||
],
|
||||
max_tokens=5,
|
||||
logprobs=True,
|
||||
top_logprobs=10,
|
||||
stream=True,
|
||||
)
|
||||
output_text = ''
|
||||
aggregated_text = ''
|
||||
for data in res:
|
||||
choice = data.choices[0]
|
||||
if choice.finish_reason is None:
|
||||
if choice.delta.content:
|
||||
output_text += choice.delta.content
|
||||
assert choice.logprobs is not None
|
||||
assert choice.logprobs.content is not None
|
||||
for token in choice.logprobs.content:
|
||||
aggregated_text += token.token
|
||||
assert token.logprob <= 0.0
|
||||
assert token.bytes is not None
|
||||
assert token.top_logprobs is not None
|
||||
assert len(token.top_logprobs) > 0
|
||||
assert aggregated_text == output_text
|
||||
|
|
|
@ -270,9 +270,68 @@ def test_n_probs():
|
|||
assert "completion_probabilities" in res.body
|
||||
assert len(res.body["completion_probabilities"]) == 5
|
||||
for tok in res.body["completion_probabilities"]:
|
||||
assert "probs" in tok
|
||||
assert len(tok["probs"]) == 10
|
||||
for prob in tok["probs"]:
|
||||
assert "prob" in prob
|
||||
assert "tok_str" in prob
|
||||
assert 0.0 <= prob["prob"] <= 1.0
|
||||
assert "id" in tok and tok["id"] > 0
|
||||
assert "token" in tok and type(tok["token"]) == str
|
||||
assert "logprob" in tok and tok["logprob"] <= 0.0
|
||||
assert "bytes" in tok and type(tok["bytes"]) == list
|
||||
assert len(tok["top_logprobs"]) == 10
|
||||
for prob in tok["top_logprobs"]:
|
||||
assert "id" in prob and prob["id"] > 0
|
||||
assert "token" in prob and type(prob["token"]) == str
|
||||
assert "logprob" in prob and prob["logprob"] <= 0.0
|
||||
assert "bytes" in prob and type(prob["bytes"]) == list
|
||||
|
||||
|
||||
def test_n_probs_stream():
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_stream_request("POST", "/completion", data={
|
||||
"prompt": "I believe the meaning of life is",
|
||||
"n_probs": 10,
|
||||
"temperature": 0.0,
|
||||
"n_predict": 5,
|
||||
"stream": True,
|
||||
})
|
||||
for data in res:
|
||||
if data["stop"] == False:
|
||||
assert "completion_probabilities" in data
|
||||
assert len(data["completion_probabilities"]) == 1
|
||||
for tok in data["completion_probabilities"]:
|
||||
assert "id" in tok and tok["id"] > 0
|
||||
assert "token" in tok and type(tok["token"]) == str
|
||||
assert "logprob" in tok and tok["logprob"] <= 0.0
|
||||
assert "bytes" in tok and type(tok["bytes"]) == list
|
||||
assert len(tok["top_logprobs"]) == 10
|
||||
for prob in tok["top_logprobs"]:
|
||||
assert "id" in prob and prob["id"] > 0
|
||||
assert "token" in prob and type(prob["token"]) == str
|
||||
assert "logprob" in prob and prob["logprob"] <= 0.0
|
||||
assert "bytes" in prob and type(prob["bytes"]) == list
|
||||
|
||||
|
||||
def test_n_probs_post_sampling():
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_request("POST", "/completion", data={
|
||||
"prompt": "I believe the meaning of life is",
|
||||
"n_probs": 10,
|
||||
"temperature": 0.0,
|
||||
"n_predict": 5,
|
||||
"post_sampling_probs": True,
|
||||
})
|
||||
assert res.status_code == 200
|
||||
assert "completion_probabilities" in res.body
|
||||
assert len(res.body["completion_probabilities"]) == 5
|
||||
for tok in res.body["completion_probabilities"]:
|
||||
assert "id" in tok and tok["id"] > 0
|
||||
assert "token" in tok and type(tok["token"]) == str
|
||||
assert "prob" in tok and 0.0 < tok["prob"] <= 1.0
|
||||
assert "bytes" in tok and type(tok["bytes"]) == list
|
||||
assert len(tok["top_probs"]) == 10
|
||||
for prob in tok["top_probs"]:
|
||||
assert "id" in prob and prob["id"] > 0
|
||||
assert "token" in prob and type(prob["token"]) == str
|
||||
assert "prob" in prob and 0.0 <= prob["prob"] <= 1.0
|
||||
assert "bytes" in prob and type(prob["bytes"]) == list
|
||||
# because the test model usually output token with either 100% or 0% probability, we need to check all the top_probs
|
||||
assert any(prob["prob"] == 1.0 for prob in tok["top_probs"])
|
||||
|
|
|
@ -50,6 +50,8 @@ def test_embedding_multiple():
|
|||
@pytest.mark.parametrize(
|
||||
"input,is_multi_prompt",
|
||||
[
|
||||
# do not crash on empty input
|
||||
("", False),
|
||||
# single prompt
|
||||
("string", False),
|
||||
([12, 34, 56], False),
|
||||
|
@ -103,6 +105,7 @@ def test_embedding_pooling_none_oai():
|
|||
|
||||
# /v1/embeddings does not support pooling type 'none'
|
||||
assert res.status_code == 400
|
||||
assert "error" in res.body
|
||||
|
||||
|
||||
def test_embedding_openai_library_single():
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue