server : fill usage info in embeddings and rerank responses (#10852)

* server : fill usage info in embeddings response

* server : fill usage info in reranking response
This commit is contained in:
krystiancha 2024-12-17 16:00:24 +00:00 committed by GitHub
parent 382bc7f2e8
commit 05c3a444b8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 77 additions and 10 deletions

View file

@ -53,3 +53,26 @@ def test_invalid_rerank_req(documents):
})
assert res.status_code == 400
assert "error" in res.body
@pytest.mark.parametrize(
"query,doc1,doc2,n_tokens",
[
("Machine learning is", "A machine", "Learning is", 19),
("Which city?", "Machine learning is ", "Paris, capitale de la", 26),
]
)
def test_rerank_usage(query, doc1, doc2, n_tokens):
global server
server.start()
res = server.make_request("POST", "/rerank", data={
"query": query,
"documents": [
doc1,
doc2,
]
})
assert res.status_code == 200
assert res.body['usage']['prompt_tokens'] == res.body['usage']['total_tokens']
assert res.body['usage']['prompt_tokens'] == n_tokens