server : fill usage info in embeddings and rerank responses (#10852)
* server : fill usage info in embeddings response * server : fill usage info in reranking response
This commit is contained in:
parent
382bc7f2e8
commit
05c3a444b8
4 changed files with 77 additions and 10 deletions
|
@ -53,3 +53,26 @@ def test_invalid_rerank_req(documents):
|
|||
})
|
||||
assert res.status_code == 400
|
||||
assert "error" in res.body
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"query,doc1,doc2,n_tokens",
|
||||
[
|
||||
("Machine learning is", "A machine", "Learning is", 19),
|
||||
("Which city?", "Machine learning is ", "Paris, capitale de la", 26),
|
||||
]
|
||||
)
|
||||
def test_rerank_usage(query, doc1, doc2, n_tokens):
|
||||
global server
|
||||
server.start()
|
||||
|
||||
res = server.make_request("POST", "/rerank", data={
|
||||
"query": query,
|
||||
"documents": [
|
||||
doc1,
|
||||
doc2,
|
||||
]
|
||||
})
|
||||
assert res.status_code == 200
|
||||
assert res.body['usage']['prompt_tokens'] == res.body['usage']['total_tokens']
|
||||
assert res.body['usage']['prompt_tokens'] == n_tokens
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue