server : fill usage info in embeddings and rerank responses (#10852)

* server : fill usage info in embeddings response

* server : fill usage info in reranking response
This commit is contained in:
krystiancha 2024-12-17 16:00:24 +00:00 committed by GitHub
parent 382bc7f2e8
commit 05c3a444b8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 77 additions and 10 deletions

View file

@ -97,3 +97,33 @@ def test_same_prompt_give_same_result():
vi = res.body['data'][i]['embedding']
for x, y in zip(v0, vi):
assert abs(x - y) < EPSILON
@pytest.mark.parametrize(
"content,n_tokens",
[
("I believe the meaning of life is", 7),
("This is a test", 4),
]
)
def test_embedding_usage_single(content, n_tokens):
global server
server.start()
res = server.make_request("POST", "/embeddings", data={"input": content})
assert res.status_code == 200
assert res.body['usage']['prompt_tokens'] == res.body['usage']['total_tokens']
assert res.body['usage']['prompt_tokens'] == n_tokens
def test_embedding_usage_multiple():
global server
server.start()
res = server.make_request("POST", "/embeddings", data={
"input": [
"I believe the meaning of life is",
"I believe the meaning of life is",
],
})
assert res.status_code == 200
assert res.body['usage']['prompt_tokens'] == res.body['usage']['total_tokens']
assert res.body['usage']['prompt_tokens'] == 2 * 7