From fd4cf34b004fa630e2c5186ee62e51c56c208cfa Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Wed, 18 Dec 2024 17:27:29 +0100 Subject: [PATCH] "top_probs" with "post_sampling_probs" --- examples/server/README.md | 7 ++++++- examples/server/server.cpp | 5 ++++- examples/server/tests/unit/test_completion.py | 4 ++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/examples/server/README.md b/examples/server/README.md index 73e394cfb..647fa49ab 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -497,7 +497,12 @@ These words will not be included in the completion, so make sure to add them to ``` Please note that if `post_sampling_probs` is set to `true`: - `logprob` will be replace with `prob`, with the value between 0.0 and 1.0 - - Returned number of probabilities may be less than `n_probs` + - `top_logprobs` will be replace with `top_probs`. Each element inside contains: + - `id`: token ID + - `token`: token in string + - `bytes`: token in bytes + - `prob`: token probability, with the value between 0.0 and 1.0 + - Number of elements in `top_probs` may be less than `n_probs` - `content`: Completion result as a string (excluding `stopping_word` if any). In case of streaming mode, will contain the next token as a string. - `tokens`: Same as `content` but represented as raw token ids. Only populated if `"return_tokens": true` or `"stream": true` in the request. diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 1b20c8e59..a5ac8db76 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -475,7 +475,10 @@ struct completion_token_output { {"id", it.tok}, {"token", tok_str}, {"bytes", str_to_bytes(it.text_to_send)}, - {"top_logprobs", it.to_json(post_sampling_probs)}, + { + post_sampling_probs ? "top_probs" : "top_logprobs", + it.to_json(post_sampling_probs) + }, { post_sampling_probs ? "prob" : "logprob", post_sampling_probs ? it.prob : logarithm(it.prob) diff --git a/examples/server/tests/unit/test_completion.py b/examples/server/tests/unit/test_completion.py index f583737ca..24342b3bb 100644 --- a/examples/server/tests/unit/test_completion.py +++ b/examples/server/tests/unit/test_completion.py @@ -327,8 +327,8 @@ def test_n_probs_post_sampling(): assert "token" in tok and type(tok["token"]) == str assert "prob" in tok and 0.0 <= tok["prob"] <= 1.0 assert "bytes" in tok and type(tok["bytes"]) == list - assert len(tok["top_logprobs"]) == 10 - for prob in tok["top_logprobs"]: + assert len(tok["top_probs"]) == 10 + for prob in tok["top_probs"]: assert "id" in prob and prob["id"] > 0 assert "token" in prob and type(prob["token"]) == str assert "prob" in prob and 0.0 <= prob["prob"] <= 1.0